pax_global_header00006660000000000000000000000064147023155060014515gustar00rootroot0000000000000052 comment=5f20c4770b6ca280999e553c54afb58f03bd9c4f trace-cmd-v3.3.1/000077500000000000000000000000001470231550600135465ustar00rootroot00000000000000trace-cmd-v3.3.1/CODING_STYLE000066400000000000000000000160401470231550600154150ustar00rootroot00000000000000 trace-cmd coding-style ====================== The coding style of trace-cmd and the tracing libraries (libtracefs and libtraceevent) are very similar to the Linux kernel coding style: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/process/coding-style.rst Indentation =========== Tabs are used for the start of indentation (the '\t' character), and should be set to 8 characters. Spaces may be used at the end for continued lines where having the start of text line up to braces in the previous line is not divisible by 8. Max line width ============== All lines should not be more than 100 characters in length. This is a guide, as readability is more important than breaking lines up into a hard limit. Ideally, strings should never be broken up except for where a new line is added. printf("This is a line that may continue for a very long string.\n" "This is another line, but after a new line\n"); But line breaks should not be: printf("This is a line that may continue for a very" "long string.\n This is another line," "but after a new line\n"); Not only is the above not as readable as the first version, it is not even equivalent, because it is missing spaces between the line breaks. For this reason, finish the string on the same line, even if that string breaks the 100 character limit. Brackets and braces =================== For all conditionals, the braces start on the same line: if (cond) { } And the ending brace is at the same indentation as the conditional. while (cond) { } do { } while (cond); for (i = 0; i < 10; i++) { } The same is true for structures: struct my_struct { int field; }; But for functions, the braces should start on the following line: void my_function(void) { } It is also fine to not use braces for simple conditionals and loops. if (!x) y = x; else y = 1; for (i = 0; i < 10; i++) foo(i); while (getline(&line, &size, fp) > 0) printf("%s", line); But any complex or multiline conditional or loop should have braces even if it is allowed not to by the C language. if (x) { for (i = 0; i < 10; i++) foo(i); } else { foo(1); } Notice above that even though the else portion is simple, it too has braces as the else and if blocks should match. If one is required to have braces, they both should have braces. Spaces ====== A single space should be used between C commands and their starting parenthesis. if (x) for (i = 0; i < 10; i++) while (getline(&line, &size, fp) > 0) There should be no space between function or macros and the starting parenthesis. foo(x) IS_VALID(y) This includes prototypes and declarations. void foo(int x) A space should be before and after assignment, comparison and algorithmic signs. i = 0; if (i < 10) if (i == 5) y = i + 10; i += 5; For structures, use tabs to make all the fields line up nicely. struct { int foo; int bar; unsigned long long time; }; Variable declarations ===================== The order of variables that are declared, should first keep the same types together, but also should be ordered by their length such that the variables are ordered in an "upside-down Christmas tree" fashion where the length gets smaller. int tracecmd_count_cpus(void) { static int once; char buf[1024]; int cpus = 0; char *pbuf; size_t *pn; FILE *fp; size_t n; int r; The above shows that the order is done by length, and in the above example it also shows that "int cpu = 0;" is not grouped next to "int r;". As this is more of a guideline and made to be more aesthetic to the eye of the reader, both the above and is acceptable as below. int tracecmd_count_cpus(void) { static int once; char buf[1024]; char *pbuf; size_t *pn; FILE *fp; size_t n; int cpus = 0; int r; Unless variables are tightly related, it is expected that each variable be on its own line and not grouped by type. That is, int r, cpus = 0; is to be discouraged, as the two variables are not related to each other. But if you had a bunch of counters: int i, j, k; That would be fine, as the variables are all related as they are all for the same purpose (arbitrary counters). The same may go with pointers; char *begin, *end; Comments ======== Comments will use the "/* */" format and the C++ "//" style is discouraged. If a comment is on one line, keep the "/*" and "*/" on the same line: /* This is a single line comment. */ If a comment spans more than one line, then have the "/*" on a separate line before the comment and the "*/" on a separate line at the end of the comment, and each line starts with a "*" where all the "*" line up with each other. /* * This is a multi line comment, where all the '*' * will line up, and the text is on a separate line * as the start and end markers. */ Function documentation ====================== All global functions (and especially any APIs) should have a function description in the form of "kernel doc": https://www.kernel.org/doc/html/latest/doc-guide/kernel-doc.html The form is: /** * function_name() - Brief description of function. * @arg1: Describe the first argument. * @arg2: Describe the second argument. * One can provide multiple line descriptions * for arguments. * * A longer description, with more discussion of the function function_name() * that might be useful to those using or modifying it. Begins with an * empty comment line, and may include additional embedded empty * comment lines. * * The longer description may have multiple paragraphs. * * Context: Describes whether the function can sleep, what locks it takes, * releases, or expects to be held. It can extend over multiple * lines. * Return: Describe the return value of function_name. * * The return value description can also have multiple paragraphs, and should * be placed at the end of the comment block. */ Structure layout ================ This is more about compaction than coding style. When creating structures, be aware that if the fields are placed together without being sized by alignment, that the compiler will create "holes" in them. struct { int x; char y; unsigned long long f; }; As int is 4 bytes in length, char is one byte, and unsigned long long is 8 bytes. The compiler will try to naturally align them by their size, and will include padding (holes) inside the structure to do so. The above is equivalent to: struct { int x; char y; char padding[3]; unsigned long long f; }; It is best to try to organize the structure where there are no holes within them. struct { unsigned long long f; int x; char y; }; The above is better formatting, even if there may be padding outside the structure, but the compiler will still have more flexibility to utilize the space outside the structure than what it can do within it. General ======= As stated, this is a guide and may not be strictly enforced. The goal is to have consistent and readable code. In general, try to have the coding style match the surrounding code. trace-cmd-v3.3.1/CONTRIBUTE000066400000000000000000000106371470231550600151560ustar00rootroot00000000000000If you like to become part of the community and submit patches, here's how to do so for trace-cmd. If you only want to report a bug, or suggest an enhancement, you may do so at: https://bugzilla.kernel.org/buglist.cgi?component=Trace-cmd%2FKernelshark All development is done via a mailing list: http://vger.kernel.org/vger-lists.html#linux-trace-devel Patches should be sent to linux-trace-devel@vger.kernel.org Start by cloning the official repository: git clone git://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git Make your changes. When you are satisfied with them, commit them into git. Here's some helpful hints for your git commits. 1) When making changes, please follow the coding style defined by the file called CODING_STYLE in this directory. 2) Every commit should only do one thing. That is, if your work requires some cleaning up of code, do that clean up as a separate commit and not with your functional changes. Find ways to take "steps" in modifying code. If you can break up your changes in a series of steps, do so. 3) The commit log should start with a title. Like the below trace-cmd: Add CONTRIBUTE file Even though this repo is for trace-cmd, start the topic with "trace-cmd:" because the commits will end up as patches to a mailing list that handles other tracing repos, differentiating them with the subject is useful. You can be more specific as well. If the change only affects the "record" command, you may start the title with "trace-cmd record:". 4) The body of the commit (with a blank line from the title), should be self contained, and explain why you are making the change. The title should hold the "what" is changing, but the body contains the rationale for the change. It should be a stand alone, and not state things like "See the next patch", because when it is in git history, there's no knowing what the next patch is. You can make statements like "This is needed for a that will come later". Where "" is something that you are working on and the current commit is one of the steps required to get there. 5) Add your Developer Certificate of Origin (DCO) at the bottom of the commit log. That is "Signed-off-by: Full Name " where your full name is your real name (no pseudonyms). Optionally, if you are making the change on behalf of your company, you may also add your company name, if you are not using your company's email. "Signed-off-by: Full Name (Company) ". Please note, the DCO is your statement that you have the legal right to make these changes for the project you are submitting to. You can use the Linux kernel "checkpatch.pl" script to help verify the formatting of your patch: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/scripts/checkpatch.pl Please note that checkpatch.pl is a guide and not a hard rule. If it reports a fix that makes the code harder to read, that fix can probably be ignored. git format-patch --stdout HEAD~1..HEAD | ./checkpatch.pl Finally, you can use the git "send-email" functionality: git send-email --from=' --to='linux-trace-devel@vger.kernel.org' HEAD~1..HEAD If you are sending one patch, if you are adding more than one patch, also include a cover letter: git send-email --cover-letter --annotate --from=' --to='linux-trace-devel@vger.kernel.org' ~1..HEAD If you receive feedback on your patches, and plan on sending another version, please use the '-v' option to mark your patches that they are a new version. For example, if you add "-v2" to the above commands, instead of having: "[PATCH]" in the subject, it will have "[PATCH v2]", letting the reviewers know that this is a new version. If you send another version, use "-v3" and so on. For more information about git send-email: https://git-scm.com/docs/git-send-email To keep track of the status of patches that have been submitted, check out: https://patchwork.kernel.org/project/linux-trace-devel/list/ If you would like to apply patches from the mailing list, you can use the "b4" utility. $ pip install b4 Then from the mailing list archive, find a message id from a patch or patch series. For example, to get the patch from: https://lore.kernel.org/linux-trace-devel/20210205173713.132051-1-tz.stoyanov@gmail.com/ $ b4 am -o - 20210205173713.132051-1-tz.stoyanov@gmail.com > /tmp/p.mbox $ git am /tmp/p.mbox trace-cmd-v3.3.1/COPYING000066400000000000000000000004141470231550600146000ustar00rootroot00000000000000There are two main licenses that the tools in this directory are covered under. For the applications themselves, they are covered under GPL-2.0 (see LICENSES/GPL-2.0). As for the exported headers and libraries, they are covered under LPGL-2.1 (see LICENSES/LGPL-2.1). trace-cmd-v3.3.1/COPYING.LIB000066400000000000000000000636511470231550600152210ustar00rootroot00000000000000 GNU LESSER GENERAL PUBLIC LICENSE Version 2.1, February 1999 Copyright (C) 1991, 1999 Free Software Foundation, Inc. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. [This is the first released version of the Lesser GPL. It also counts as the successor of the GNU Library Public License, version 2, hence the version number 2.1.] Preamble The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public Licenses are intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This license, the Lesser General Public License, applies to some specially designated software packages--typically libraries--of the Free Software Foundation and other authors who decide to use it. You can use it too, but we suggest you first think carefully about whether this license or the ordinary General Public License is the better strategy to use in any particular case, based on the explanations below. When we speak of free software, we are referring to freedom of use, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish); that you receive source code or can get it if you want it; that you can change the software and use pieces of it in new free programs; and that you are informed that you can do these things. To protect your rights, we need to make restrictions that forbid distributors to deny you these rights or to ask you to surrender these rights. These restrictions translate to certain responsibilities for you if you distribute copies of the library or if you modify it. For example, if you distribute copies of the library, whether gratis or for a fee, you must give the recipients all the rights that we gave you. You must make sure that they, too, receive or can get the source code. If you link other code with the library, you must provide complete object files to the recipients, so that they can relink them with the library after making changes to the library and recompiling it. And you must show them these terms so they know their rights. We protect your rights with a two-step method: (1) we copyright the library, and (2) we offer you this license, which gives you legal permission to copy, distribute and/or modify the library. To protect each distributor, we want to make it very clear that there is no warranty for the free library. Also, if the library is modified by someone else and passed on, the recipients should know that what they have is not the original version, so that the original author's reputation will not be affected by problems that might be introduced by others. ^L Finally, software patents pose a constant threat to the existence of any free program. We wish to make sure that a company cannot effectively restrict the users of a free program by obtaining a restrictive license from a patent holder. Therefore, we insist that any patent license obtained for a version of the library must be consistent with the full freedom of use specified in this license. Most GNU software, including some libraries, is covered by the ordinary GNU General Public License. This license, the GNU Lesser General Public License, applies to certain designated libraries, and is quite different from the ordinary General Public License. We use this license for certain libraries in order to permit linking those libraries into non-free programs. When a program is linked with a library, whether statically or using a shared library, the combination of the two is legally speaking a combined work, a derivative of the original library. The ordinary General Public License therefore permits such linking only if the entire combination fits its criteria of freedom. The Lesser General Public License permits more lax criteria for linking other code with the library. We call this license the "Lesser" General Public License because it does Less to protect the user's freedom than the ordinary General Public License. It also provides other free software developers Less of an advantage over competing non-free programs. These disadvantages are the reason we use the ordinary General Public License for many libraries. However, the Lesser license provides advantages in certain special circumstances. For example, on rare occasions, there may be a special need to encourage the widest possible use of a certain library, so that it becomes a de-facto standard. To achieve this, non-free programs must be allowed to use the library. A more frequent case is that a free library does the same job as widely used non-free libraries. In this case, there is little to gain by limiting the free library to free software only, so we use the Lesser General Public License. In other cases, permission to use a particular library in non-free programs enables a greater number of people to use a large body of free software. For example, permission to use the GNU C Library in non-free programs enables many more people to use the whole GNU operating system, as well as its variant, the GNU/Linux operating system. Although the Lesser General Public License is Less protective of the users' freedom, it does ensure that the user of a program that is linked with the Library has the freedom and the wherewithal to run that program using a modified version of the Library. The precise terms and conditions for copying, distribution and modification follow. Pay close attention to the difference between a "work based on the library" and a "work that uses the library". The former contains code derived from the library, whereas the latter must be combined with the library in order to run. ^L GNU LESSER GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License Agreement applies to any software library or other program which contains a notice placed by the copyright holder or other authorized party saying it may be distributed under the terms of this Lesser General Public License (also called "this License"). Each licensee is addressed as "you". A "library" means a collection of software functions and/or data prepared so as to be conveniently linked with application programs (which use some of those functions and data) to form executables. The "Library", below, refers to any such software library or work which has been distributed under these terms. A "work based on the Library" means either the Library or any derivative work under copyright law: that is to say, a work containing the Library or a portion of it, either verbatim or with modifications and/or translated straightforwardly into another language. (Hereinafter, translation is included without limitation in the term "modification".) "Source code" for a work means the preferred form of the work for making modifications to it. For a library, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the library. Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running a program using the Library is not restricted, and output from such a program is covered only if its contents constitute a work based on the Library (independent of the use of the Library in a tool for writing it). Whether that is true depends on what the Library does and what the program that uses the Library does. 1. You may copy and distribute verbatim copies of the Library's complete source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and distribute a copy of this License along with the Library. You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. 2. You may modify your copy or copies of the Library or any portion of it, thus forming a work based on the Library, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: a) The modified work must itself be a software library. b) You must cause the files modified to carry prominent notices stating that you changed the files and the date of any change. c) You must cause the whole of the work to be licensed at no charge to all third parties under the terms of this License. d) If a facility in the modified Library refers to a function or a table of data to be supplied by an application program that uses the facility, other than as an argument passed when the facility is invoked, then you must make a good faith effort to ensure that, in the event an application does not supply such function or table, the facility still operates, and performs whatever part of its purpose remains meaningful. (For example, a function in a library to compute square roots has a purpose that is entirely well-defined independent of the application. Therefore, Subsection 2d requires that any application-supplied function or table used by this function must be optional: if the application does not supply it, the square root function must still compute square roots.) These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Library, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Library, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Library. In addition, mere aggregation of another work not based on the Library with the Library (or with a work based on the Library) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. 3. You may opt to apply the terms of the ordinary GNU General Public License instead of this License to a given copy of the Library. To do this, you must alter all the notices that refer to this License, so that they refer to the ordinary GNU General Public License, version 2, instead of to this License. (If a newer version than version 2 of the ordinary GNU General Public License has appeared, then you can specify that version instead if you wish.) Do not make any other change in these notices. ^L Once this change is made in a given copy, it is irreversible for that copy, so the ordinary GNU General Public License applies to all subsequent copies and derivative works made from that copy. This option is useful when you wish to copy part of the code of the Library into a program that is not a library. 4. You may copy and distribute the Library (or a portion or derivative of it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange. If distribution of object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place satisfies the requirement to distribute the source code, even though third parties are not compelled to copy the source along with the object code. 5. A program that contains no derivative of any portion of the Library, but is designed to work with the Library by being compiled or linked with it, is called a "work that uses the Library". Such a work, in isolation, is not a derivative work of the Library, and therefore falls outside the scope of this License. However, linking a "work that uses the Library" with the Library creates an executable that is a derivative of the Library (because it contains portions of the Library), rather than a "work that uses the library". The executable is therefore covered by this License. Section 6 states terms for distribution of such executables. When a "work that uses the Library" uses material from a header file that is part of the Library, the object code for the work may be a derivative work of the Library even though the source code is not. Whether this is true is especially significant if the work can be linked without the Library, or if the work is itself a library. The threshold for this to be true is not precisely defined by law. If such an object file uses only numerical parameters, data structure layouts and accessors, and small macros and small inline functions (ten lines or less in length), then the use of the object file is unrestricted, regardless of whether it is legally a derivative work. (Executables containing this object code plus portions of the Library will still fall under Section 6.) Otherwise, if the work is a derivative of the Library, you may distribute the object code for the work under the terms of Section 6. Any executables containing that work also fall under Section 6, whether or not they are linked directly with the Library itself. ^L 6. As an exception to the Sections above, you may also combine or link a "work that uses the Library" with the Library to produce a work containing portions of the Library, and distribute that work under terms of your choice, provided that the terms permit modification of the work for the customer's own use and reverse engineering for debugging such modifications. You must give prominent notice with each copy of the work that the Library is used in it and that the Library and its use are covered by this License. You must supply a copy of this License. If the work during execution displays copyright notices, you must include the copyright notice for the Library among them, as well as a reference directing the user to the copy of this License. Also, you must do one of these things: a) Accompany the work with the complete corresponding machine-readable source code for the Library including whatever changes were used in the work (which must be distributed under Sections 1 and 2 above); and, if the work is an executable linked with the Library, with the complete machine-readable "work that uses the Library", as object code and/or source code, so that the user can modify the Library and then relink to produce a modified executable containing the modified Library. (It is understood that the user who changes the contents of definitions files in the Library will not necessarily be able to recompile the application to use the modified definitions.) b) Use a suitable shared library mechanism for linking with the Library. A suitable mechanism is one that (1) uses at run time a copy of the library already present on the user's computer system, rather than copying library functions into the executable, and (2) will operate properly with a modified version of the library, if the user installs one, as long as the modified version is interface-compatible with the version that the work was made with. c) Accompany the work with a written offer, valid for at least three years, to give the same user the materials specified in Subsection 6a, above, for a charge no more than the cost of performing this distribution. d) If distribution of the work is made by offering access to copy from a designated place, offer equivalent access to copy the above specified materials from the same place. e) Verify that the user has already received a copy of these materials or that you have already sent this user a copy. For an executable, the required form of the "work that uses the Library" must include any data and utility programs needed for reproducing the executable from it. However, as a special exception, the materials to be distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. It may happen that this requirement contradicts the license restrictions of other proprietary libraries that do not normally accompany the operating system. Such a contradiction means you cannot use both them and the Library together in an executable that you distribute. ^L 7. You may place library facilities that are a work based on the Library side-by-side in a single library together with other library facilities not covered by this License, and distribute such a combined library, provided that the separate distribution of the work based on the Library and of the other library facilities is otherwise permitted, and provided that you do these two things: a) Accompany the combined library with a copy of the same work based on the Library, uncombined with any other library facilities. This must be distributed under the terms of the Sections above. b) Give prominent notice with the combined library of the fact that part of it is a work based on the Library, and explaining where to find the accompanying uncombined form of the same work. 8. You may not copy, modify, sublicense, link with, or distribute the Library except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense, link with, or distribute the Library is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. 9. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Library or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Library (or any work based on the Library), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Library or works based on it. 10. Each time you redistribute the Library (or any work based on the Library), the recipient automatically receives a license from the original licensor to copy, distribute, link with or modify the Library subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties with this License. ^L 11. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Library at all. For example, if a patent license would not permit royalty-free redistribution of the Library by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Library. If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply, and the section as a whole is intended to apply in other circumstances. It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. 12. If the distribution and/or use of the Library is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Library under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. 13. The Free Software Foundation may publish revised and/or new versions of the Lesser General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Library specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Library does not specify a license version number, you may choose any version ever published by the Free Software Foundation. ^L 14. If you wish to incorporate parts of the Library into other free programs whose distribution conditions are incompatible with these, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS ^L How to Apply These Terms to Your New Libraries If you develop a new library, and you want it to be of the greatest possible use to the public, we recommend making it free software that everyone can redistribute and change. You can do so by permitting redistribution under these terms (or, alternatively, under the terms of the ordinary General Public License). To apply these terms, attach the following notices to the library. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Also add information on how to contact you by electronic and paper mail. You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the library, if necessary. Here is a sample; alter the names: Yoyodyne, Inc., hereby disclaims all copyright interest in the library `Frob' (a library for tweaking knobs) written by James Random Hacker. , 1 April 1990 Ty Coon, President of Vice That's all there is to it! trace-cmd-v3.3.1/DCO000066400000000000000000000041041470231550600140750ustar00rootroot00000000000000 (Copied from the Linux Kernel's Documentation/process/submitting-patches.rst) Sign your work - the Developer's Certificate of Origin ------------------------------------------------------ The sign-off is a simple line at the end of the explanation for the patch, which certifies that you wrote it or otherwise have the right to pass it on as an open-source patch. The rules are pretty simple: if you can certify the below: Developer's Certificate of Origin 1.1 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ By making a contribution to this project, I certify that: (a) The contribution was created in whole or in part by me and I have the right to submit it under the open source license indicated in the file; or (b) The contribution is based upon previous work that, to the best of my knowledge, is covered under an appropriate open source license and I have the right under that license to submit that work with modifications, whether created in whole or in part by me, under the same open source license (unless I am permitted to submit under a different license), as indicated in the file; or (c) The contribution was provided directly to me by some other person who certified (a), (b) or (c) and I have not modified it. (d) I understand and agree that this project and the contribution are public and that a record of the contribution (including all personal information I submit with it, including my sign-off) is maintained indefinitely and may be redistributed consistent with this project or the open source license(s) involved. then you just add a line saying:: Signed-off-by: Random J Developer using your real name (sorry, no pseudonyms or anonymous contributions.) Some people also put extra tags at the end. They'll just be ignored for now, but you can do this to mark internal company procedures or just point out some special detail about the sign-off. trace-cmd-v3.3.1/Documentation/000077500000000000000000000000001470231550600163575ustar00rootroot00000000000000trace-cmd-v3.3.1/Documentation/Makefile000066400000000000000000000044601470231550600200230ustar00rootroot00000000000000# SPDX-License-Identifier: GPL-2.0 doc_dir:=$(src)/Documentation export doc_dir SUBDIR += trace-cmd SUBDIR += libtracecmd .PHONY: $(SUBDIR) DOCDIR = $(src)/Documentation ASCIIDOC=asciidoc ASCIIDOC_CONF = $(DOCDIR)/asciidoc.conf ASCIIDOC_EXTRA = --unsafe -f $(ASCIIDOC_CONF) ASCIIDOC_HTML = xhtml11 MANPAGE_XSL = $(DOCDIR)/manpage-normal.xsl XMLTO_EXTRA = INSTALL?=install RM ?= rm -f ASCIIDOC_INSTALLED := $(shell command -v $(ASCIIDOC) 2> /dev/null) ifndef ASCIIDOC_INSTALLED missing_tools += $(ASCIIDOC) endif XMLTO=xmlto XMLTO_INSTALLED := $(shell command -v $(XMLTO) 2> /dev/null) ifndef XMLTO_INSTALLED missing_tools += $(XMLTO) endif # # For asciidoc ... # -7.1.2, no extra settings are needed. # 8.0-, set ASCIIDOC8. # # # For docbook-xsl ... # -1.68.1, set ASCIIDOC_NO_ROFF? (based on changelog from 1.73.0) # 1.69.0, no extra settings are needed? # 1.69.1-1.71.0, set DOCBOOK_SUPPRESS_SP? # 1.71.1, no extra settings are needed? # 1.72.0, set DOCBOOK_XSL_172. # 1.73.0-, set ASCIIDOC_NO_ROFF # # # If you had been using DOCBOOK_XSL_172 in an attempt to get rid # of 'the ".ft C" problem' in your generated manpages, and you # instead ended up with weird characters around callouts, try # using ASCIIDOC_NO_ROFF instead (it works fine with ASCIIDOC8). # ifdef ASCIIDOC8 ASCIIDOC_EXTRA += -a asciidoc7compatible endif ifdef DOCBOOK_XSL_172 ASCIIDOC_EXTRA += -a libtracecmd-asciidoc-no-roff MANPAGE_XSL = $(DOCDIR)/manpage-1.72.xsl else ifdef ASCIIDOC_NO_ROFF # docbook-xsl after 1.72 needs the regular XSL, but will not # pass-thru raw roff codes from asciidoc.conf, so turn them off. ASCIIDOC_EXTRA += -a libtracecmd-asciidoc-no-roff endif endif ifdef MAN_BOLD_LITERAL XMLTO_EXTRA += -m $(DOCDIR)/manpage-bold-literal.xsl endif ifdef DOCBOOK_SUPPRESS_SP XMLTO_EXTRA += -m $(DOCDIR)/manpage-suppress-sp.xsl endif ifdef USE_ASCIIDOCTOR ASCIIDOC = asciidoctor ASCIIDOC_EXTRA = -a compat-mode ASCIIDOC_EXTRA += -I. -rasciidoctor-extensions ASCIIDOC_HTML = xhtml5 endif ifneq ($(findstring $(MAKEFLAGS),w),w) PRINT_DIR = --no-print-directory else # "make -w" NO_SUBDIR = : endif export ASCIIDOC ASCIIDOC_CONF ASCIIDOC_EXTRA ASCIIDOC_HTML export MANPAGE_XSL export XMLTO XMLTO_INSTALLED XMLTO_EXTRA export missing_tools export RM all: $(SUBDIR) clean: $(SUBDIR) install: $(SUBDIR) $(SUBDIR): make -C $@ $(MAKECMDGOALS) trace-cmd-v3.3.1/Documentation/README.PythonPlugin000066400000000000000000000067431470231550600217100ustar00rootroot00000000000000 PYTHON PLUGIN DOCUMENTATION ============================= With the python plugin (make python-plugin) you can now write plugins in python. The API exported by the python plugin itself (written in C) allows you to access most information about a record from python. To write a python plugin, put a new .py file into a new ~/.trace-cmd/python/ directory. The most basic python plugin is this: --- %< --- def register(pevent): pass --- >% --- which obviously does nothing at all. To register a callback, use the pevent.register_event_handler function: --- %< --- import tracecmd def my_event_handler(trace_seq, event): pass def register(pevent): pevent.register_event_handler("subsys", "event_name", my_event_handler) --- >% --- There are four object types that you get, described below. tracecmd.PEvent ----------------- This is the class of the 'pevent' object above, you get one of those via your register callback. It has one method and one property: * register_event_handler() - example above, to register an event handler function * file_endian - either '<' or '>' indicating which endianness the file has, to be used with struct.unpack() tracecmd.TraceSeq ------------------- This is the class of the 'trace_seq' parameter to your callback function. It has only one method, puts(), to put data into the buffer. Formatting must be done in python. tracecmd.Event ---------------------- This is the class of the 'event' parameter to your callback function. Note that it doesn't just contain the format, but also the event data. As such, you can do much with this, and this is what you'll usually use. Each instance of this allows access to record items via the dict protocol, and you can get the items via its keys() methods. So for example, your callback could be --- %< --- def my_callback(trace_seq, event): for fieldname in event.keys(): field = event[fieldname] --- >% --- Each field returned from the dict protocol is an instance of the next (and last) class: tracecmd.Field ---------------------- This is an instance of a field, including its data. It affords numerous use cases and is what you'll be using most. * If this is an integer field, i.e. 1, 2, 4 or 8 bytes long, you can convert it to the number contained, according to the file's endianness, by simply casting it to a long: field = event['myint'] value = long(field) * You can access the field's data, as field.data, and if the data is really a "__data_loc" type that will be resolved automatically. (If you don't know what this means, don't worry about it and just use field.data) This is it. It's pretty simple. A fully-featured plugin could look like this: --- %< --- def my_event_handler(trace_seq, event): trace_seq.puts("myev: %u", long(event['myfield'])) def register(pevent): pevent.register_event_handler("subsys", "event_name", my_event_handler) --- >% --- Tips and tricks ----------------- Be familiar with the struct module and use it, always checking endianness and potentially using pevent.file_endian. If you need access to pevent in your callbacks, simply pass it in yourself: --- %< --- def my_event_handler(pevent, trace_seq, event): pass def register(pevent): pevent.register_event_handler("subsys", "event_name", lambda *args: my_event_handler(pevent, *args) ) --- >% --- trace-cmd-v3.3.1/Documentation/asciidoc.conf000066400000000000000000000061741470231550600210140ustar00rootroot00000000000000## linktep: macro # # Usage: linktep:command[manpage-section] # # Note, {0} is the manpage section, while {target} is the command. # # Show TEP link as: (
); if section is defined, else just show # the command. [macros] (?su)[\\]?(?Plinktep):(?P\S*?)\[(?P.*?)\]= [attributes] asterisk=* plus=+ caret=^ startsb=[ endsb=] tilde=~ ifdef::backend-docbook[] [linktep-inlinemacro] {0%{target}} {0#} {0#{target}{0}} {0#} endif::backend-docbook[] ifdef::backend-docbook[] ifndef::tep-asciidoc-no-roff[] # "unbreak" docbook-xsl v1.68 for manpages. v1.69 works with or without this. # v1.72 breaks with this because it replaces dots not in roff requests. [listingblock] {title} ifdef::doctype-manpage[] .ft C endif::doctype-manpage[] | ifdef::doctype-manpage[] .ft endif::doctype-manpage[] {title#} endif::tep-asciidoc-no-roff[] ifdef::tep-asciidoc-no-roff[] ifdef::doctype-manpage[] # The following two small workarounds insert a simple paragraph after screen [listingblock] {title} | {title#} [verseblock] {title} {title%} {title#} | {title#} {title%} endif::doctype-manpage[] endif::tep-asciidoc-no-roff[] endif::backend-docbook[] ifdef::doctype-manpage[] ifdef::backend-docbook[] [header] template::[header-declarations] {mantitle} {manvolnum} libtracefs {libtracefs_version} libtracefs Manual {manname1} {manname2} {manname3} {manname4} {manname5} {manname6} {manname7} {manname8} {manname9} {manname10} {manname11} {manname12} {manname13} {manname14} {manname15} {manname16} {manname17} {manname18} {manname19} {manname20} {manname21} {manname22} {manname23} {manname24} {manname25} {manname26} {manname27} {manname28} {manname29} {manname30} {manpurpose} endif::backend-docbook[] endif::doctype-manpage[] ifdef::backend-xhtml11[] [linktep-inlinemacro] {target}{0?({0})} endif::backend-xhtml11[] trace-cmd-v3.3.1/Documentation/libtracecmd/000077500000000000000000000000001470231550600206305ustar00rootroot00000000000000trace-cmd-v3.3.1/Documentation/libtracecmd/Makefile000066400000000000000000000054001470231550600222670ustar00rootroot00000000000000# SPDX-License-Identifier: GPL-2.0 # Include the utils include $(src)/scripts/utils.mk # This Makefile and manpage XSL files were taken from libtracefs # and modified for libtracecmd MAN3_TXT= \ $(wildcard libtracecmd-*.txt) \ libtracecmd.txt MAN_TXT = $(MAN3_TXT) _MAN_XML=$(patsubst %.txt,%.xml,$(MAN_TXT)) _MAN_HTML=$(patsubst %.txt,%.html,$(MAN_TXT)) _DOC_MAN3=$(patsubst %.txt,%.m,$(MAN3_TXT)) MAN_XML=$(addprefix $(OUTPUT),$(_MAN_XML)) MAN_HTML=$(addprefix $(OUTPUT),$(_MAN_HTML)) DOC_MAN3=$(addprefix $(OUTPUT),$(_DOC_MAN3)) # Make the path relative to DESTDIR, not prefix ifndef DESTDIR prefix?=$(HOME) endif bindir?=$(prefix)/bin htmldir?=$(prefix)/share/doc/libtracecmd-doc pdfdir?=$(prefix)/share/doc/libtracecmd-doc mandir?=$(prefix)/share/man man3dir=$(mandir)/man3 ifdef USE_ASCIIDOCTOR ASCIIDOC_EXTRA += -a mansource="libtracecmd" -a manmanual="libtracecmd Manual" endif all: check-man-tools html man man: man3 man3: $(DOC_MAN3) html: $(MAN_HTML) $(MAN_HTML) $(DOC_MAN3): $(ASCIIDOC_CONF) install: check-man-tools install-man install-html check-man-tools: ifdef missing_tools $(error "You need to install $(missing_tools) for man pages") endif install-%.3: $(OUTPUT)%.3 $(Q)$(call do_install_docs,$<,$(man3dir),644); do-install-man: man $(addprefix install-,$(wildcard $(OUTPUT)*.3)) install-man: man $(Q)$(MAKE) -C . do-install-man install-%.txt: $(OUTPUT)%.html $(Q)$(call do_install_docs,$<,$(htmldir),644); do-install-html: html $(addprefix install-,$(wildcard *.txt)) install-html: html do-install-html uninstall: uninstall-man uninstall-html uninstall-man: $(Q)$(RM) $(addprefix $(DESTDIR)$(man3dir)/,$(DOC_MAN3)) uninstall-html: $(Q)$(RM) $(addprefix $(DESTDIR)$(htmldir)/,$(MAN_HTML)) ifdef missing_tools DO_INSTALL_MAN = $(warning Please install $(missing_tools) to have the man pages installed) else DO_INSTALL_MAN = do-install-man endif CLEAN_FILES = \ $(MAN_XML) $(addsuffix +,$(MAN_XML)) \ $(MAN_HTML) $(addsuffix +,$(MAN_HTML)) \ $(DOC_MAN3) *.3 *.m clean: $(Q) $(RM) $(CLEAN_FILES) ifdef USE_ASCIIDOCTOR $(OUTPUT)%.m : $(OUTPUT)%.txt $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ $(ASCIIDOC) -b manpage -d manpage \ $(ASCIIDOC_EXTRA) -alibtracecmd_version=$(LIBTRACECMD_VERSION) -o $@+ $< && \ mv $@+ $@ endif $(OUTPUT)%.m : $(OUTPUT)%.xml $(QUIET_XMLTO)$(RM) $@ && \ $(XMLTO) -o $(OUTPUT). -m $(MANPAGE_XSL) $(XMLTO_EXTRA) man $<; \ touch $@ $(OUTPUT)%.xml : %.txt $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ $(ASCIIDOC) -b docbook -d manpage \ $(ASCIIDOC_EXTRA) -alibtracecmd_version=$(LIBTRACECMD_VERSION) -o $@+ $< && \ mv $@+ $@ $(MAN_HTML): $(OUTPUT)%.html : %.txt $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ $(ASCIIDOC) -b $(ASCIIDOC_HTML) -d manpage \ $(ASCIIDOC_EXTRA) -alibtracecmd_version=$(LIBTRACECMD_VERSION) -o $@+ $< && \ mv $@+ $@ trace-cmd-v3.3.1/Documentation/libtracecmd/install-docs.sh.in000077500000000000000000000013501470231550600241670ustar00rootroot00000000000000#!/bin/bash # SPDX-License-Identifier: LGPL-2.1 # # Copyright (c) 2023 Daniel Wagner, SUSE LLC for section in 1 3 5; do while IFS= read -r -d '' man; do [ ! -d "${DESTDIR}@MANDIR@/man${section}" ] && install -d "${DESTDIR}@MANDIR@/man${section}" echo Installing "${man}" to "${DESTDIR}@MANDIR@/man${section}" install -m 0644 "${man}" "${DESTDIR}@MANDIR@/man${section}/" done< <(find "@SRCDIR@" -name "*\.${section}" -type f -print0) done while IFS= read -r -d '' html; do [ ! -d "${DESTDIR}@HTMLDIR@" ] && install -d "${DESTDIR}@HTMLDIR@" echo Installing "${html}" to "${DESTDIR}@HTMLDIR@" install -m 0644 "${html}" "${DESTDIR}@HTMLDIR@" done< <(find "@SRCDIR@" -name "*\.html" -type f -print0) trace-cmd-v3.3.1/Documentation/libtracecmd/libtracecmd-files.txt000066400000000000000000000142101470231550600247400ustar00rootroot00000000000000libtracecmd(3) ============= NAME ---- tracecmd_open, tracecmd_open_fd, tracecmd_open_head, tracecmd_init_data, tracecmd_close, tracecmd_set_private, tracecmd_get_private - Open and close a trace file. SYNOPSIS -------- [verse] -- *#include * struct tracecmd_input pass:[*]*tracecmd_open*(const char pass:[*]_file_, int _flags_); struct tracecmd_input pass:[*]*tracecmd_open_fd*(int _fd_, int _flags_); struct tracecmd_input pass:[*]*tracecmd_open_head*(const char pass:[*]_file_, int _flags_); int *tracecmd_init_data*(struct tracecmd_input pass:[*]_handle_); void *tracecmd_close*(struct tracecmd_input pass:[*]_handle_); void *tracecmd_set_private*(struct tracecmd_input pass:[*]_handle_, void pass:[*]_data_); void pass:[*]*tracecmd_get_private*(struct tracecmd_input pass:[*]_handle_); -- DESCRIPTION ----------- This set of APIs can be used to open and close a trace file recorded by *trace-cmd(1)* and containing tracing information from ftrace, the official Linux kernel tracer. The opened file is represented by a _tracecmd_input_ structure, all other library APIs that work with the file require a pointer to the structure. The APIs for opening a trace file have a _flag_ input parameter, which controls how the file will be opened and parsed. The _flag_ is a combination of these options: TRACECMD_FL_LOAD_NO_PLUGINS - Do not load any plugins TRACECMD_FL_LOAD_NO_SYSTEM_PLUGINS - Do not load system wide plugins, load only "local only" plugins from user's home directory. The *tracecmd_open()* function opens a given trace _file_, parses the metadata headers from the file, allocates and initializes а _tracecmd_input_ handler structure representing the file. It also initializes the handler for reading trace data from the file. The returned handler is ready to be used with _tracecmd_read__ APIs. The *tracecmd_open_fd()* function does the same as *tracecmd_open()*, but works with a file descriptor to a trace file, opened for reading. The *tracecmd_open_head()* function is the same as *tracecmd_open()*, but does not initialize the handler for reading trace data. It reads and parses the metadata headers only. The *tracecmd_init_data()* should be used before using the _tracecmd_read__ APIs. The *tracecmd_init_data()* function initializes a _handle_, allocated with *tracecmd_open_head()*, for reading trace data from the file associated with it. This API must be called before any of the _tracecmd_read__ APIs. The *tracecmd_close()* function frees a _handle_, pointer to tracecmd_input structure, previously allocated with *tracecmd_open()*, *tracecmd_open_fd()* or *tracecmd_open_head()* APIs. The *tracecmd_set_private()* function allows to add specific _data_ to the _handle_ that can be retrieved later. The *tracecmd_get_private()* function will retrieve the _data_ set by *tracecmd_set_private()* for the given _handle_. RETURN VALUE ------------ The *tracecmd_open()*, *tracecmd_open_fd()* and *tracecmd_open_head()* functions return a pointer to tracecmd_input structure or NULL in case of an error. The returned structure must be free with *tracecmd_close()*. Note that if *tracecmd_open_fd()* is used to allocate a tracecmd_input handler, when *tracecmd_close()* is called to close it, that fd will be closed also. The *tracecmd_init_data()* function returns -1 in case of an error or 0 otherwise. The *tracecmd_get_private()* returns the _data_ set by *tracecmd_set_private()*. EXAMPLE ------- [source,c] -- The are two different use patterns for opening and reading trace data from a trace file, which can be used depending on the use case. 1. Open and initialise the trace file in а single step: #include #include static int print_events(struct tracecmd_input *handle, struct tep_record *record, int cpu, void *data) { static struct trace_seq seq; struct tep_handle *tep = tracecmd_get_tep(handle); const char *file = tracecmd_get_private(handle); if (!seq.buffer) trace_seq_init(&seq); trace_seq_reset(&seq); trace_seq_printf(&seq, "%s: ", file); tep_print_event(tep, &seq, record, "%6.1000d [%03d] %s-%d %s: %s\n", TEP_PRINT_TIME, TEP_PRINT_CPU, TEP_PRINT_COMM, TEP_PRINT_PID, TEP_PRINT_NAME, TEP_PRINT_INFO); trace_seq_terminate(&seq); trace_seq_do_printf(&seq); return 0; } int main(int argc, char **argv) { struct tracecmd_input *handle; if (argc < 2) { printf("usage: %s trace.dat\n", argv[0]); exit(-1); } handle = tracecmd_open(argv[i], 0); if (!handle) exit(-1); tracecmd_set_private(handles[nr_handles], argv[i]); tracecmd_iterate_events(handles, NULL, 0, print_events, NULL); tracecmd_close(handle); } 2. Open and initialise the trace file in two steps. This allows to perform some processing based on metadata, read from the file, before initialising the trace data for reading. Example for such use case is when opening multiple trace files recorded in a same trace session. In that case timestamps of all trace events must be adjusted based on the information from the file's metadata and before reading the trace data. #include ... struct tracecmd_input *handle = tracecmd_open_head("trace.dat"); if (!handle) { /* Failed to open trace.dat file */ } ... /* do some processing, before initialising the trace data for reading */ ... if (tracecmd_init_data(handle) < 0) { /* Failed to initialize hadle for reading the trace data */ } ... /* Read tracing data from the file, using the handle */ ... tracecmd_close(handle); ... -- FILES ----- [verse] -- *trace-cmd.h* Header file to include in order to have access to the library APIs. *-ltracecmd* Linker switch to add when building a program that uses the library. -- SEE ALSO -------- *libtracefs(3)*, *libtraceevent(3)*, *trace-cmd(1)* *trace-cmd.dat(5)* AUTHOR ------ [verse] -- *Steven Rostedt* *Tzvetomir Stoyanov* -- REPORTING BUGS -------------- Report bugs to LICENSE ------- libtracecmd is Free Software licensed under the GNU LGPL 2.1 RESOURCES --------- https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/ COPYING ------- Copyright \(C) 2020 VMware, Inc. Free use of this software is granted under the terms of the GNU Public License (GPL). trace-cmd-v3.3.1/Documentation/libtracecmd/libtracecmd-instances.txt000066400000000000000000000065741470231550600256430ustar00rootroot00000000000000libtracecmd(3) ============= NAME ---- tracecmd_buffer_instances, tracecmd_buffer_instance_name, tracecmd_buffer_instance_handle - Read tracing instances from a trace file. SYNOPSIS -------- [verse] -- *#include * int *tracecmd_buffer_instances*(struct tracecmd_input pass:[*]_handle_); const char pass:[*]*tracecmd_buffer_instance_name*(struct tracecmd_input pass:[*]_handle_, int _indx_); struct tracecmd_input pass:[*]*tracecmd_buffer_instance_handle*(struct tracecmd_input pass:[*]_handle_, int _indx_); -- DESCRIPTION ----------- This set of APIs can be used to get information and read tracing data from tracing instances stored in a trace file. The *tracecmd_buffer_instances()* function gets the number of tracing instances recorded in a trace file. The top instance is not counted. The _handle_ is a tracecmd_input handler returned by *tracecmd_open_head()*. The *tracecmd_buffer_instance_name()* function gets the name of the tracing instance with given index _indx_, recorded in a trace file. The _indx_ is a number in the interval [0 .. count-1], where count is the number returned by *tracecmd_buffer_instances()*. The _handle_ is a tracecmd_input handler returned by *tracecmd_open_head()*. The *tracecmd_buffer_instance_handle()* allocates and initializes a tracecmd_input handle, associated with trace instance with index _indx_ from a trace file. The _handle_ is a tracecmd_input handler returned by *tracecmd_open_head()*. The _indx_ is a number in the interval [0 .. count-1], where count is the number returned by *tracecmd_buffer_instances()*. RETURN VALUE ------------ The *tracecmd_buffer_instances()* function returns the number of tracing instances recorded in a trace file. The *tracecmd_buffer_instance_name()* function returns a string, the name of a tracing instance, or NULL in case of an error The string must *not* be freed. The *tracecmd_buffer_instance_handle()* function returns a pointer to newly allocated tracecmd_input handler or NULL in case if an error. The returned handler must be closed by *tracecmd_close()(3)* EXAMPLE ------- [source,c] -- #include ... struct tracecmd_input *handle = tracecmd_open_head("trace.dat"); if (!handle) { /* Failed to open trace.dat file */ } ... int num = tracecmd_buffer_instances(handle); while(num) { struct tracecmd_input *h; char *name; name = tracecmd_buffer_instance_name(handle, num); if (!name) { /* Failed to get name of instance num */ } h = tracecmd_buffer_instance_handle(handle, num); if (!h) { /* Failed to initialize handler for instance num */ } ... tracecmd_close(h); num--; } ... tracecmd_close(handle); -- FILES ----- [verse] -- *trace-cmd.h* Header file to include in order to have access to the library APIs. *-ltracecmd* Linker switch to add when building a program that uses the library. -- SEE ALSO -------- *libtracefs(3)*, *libtraceevent(3)*, *trace-cmd(1)* *trace-cmd.dat(5)* AUTHOR ------ [verse] -- *Steven Rostedt* *Tzvetomir Stoyanov* -- REPORTING BUGS -------------- Report bugs to LICENSE ------- libtracecmd is Free Software licensed under the GNU LGPL 2.1 RESOURCES --------- https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/ COPYING ------- Copyright \(C) 2020 VMware, Inc. Free use of this software is granted under the terms of the GNU Public License (GPL). trace-cmd-v3.3.1/Documentation/libtracecmd/libtracecmd-iterate.txt000066400000000000000000000322701470231550600253010ustar00rootroot00000000000000libtracecmd(3) ============= NAME ---- tracecmd_iterate_events, tracecmd_iterate_events_multi, tracecmd_follow_event, tracecmd_follow_missed_events, tracecmd_filter_add, tracecmd_iterate_reset - Read events from a trace file SYNOPSIS -------- [verse] -- *#include * int *tracecmd_iterate_events*(struct tracecmd_input pass:[*]_handle_, cpu_set_t pass:[*]_cpus_, int _cpu_size_, int (pass:[*]_callback_)(struct tracecmd_input pass:[*], struct tep_record pass:[*], int, void pass:[*]), void pass:[*]_callback_data_); int *tracecmd_iterate_events_multi*(struct tracecmd_input pass:[**]_handles_, int _nr_handles_, int (pass:[*]_callback_)(struct tracecmd_input pass:[*], struct tep_record pass:[*], int, void pass:[*]), void pass:[*]_callback_data_); int *tracecmd_iterate_events_reverse*(struct tracecmd_input pass:[*]_handle_, cpu_set_t pass:[*]_cpus_, int _cpu_size_, int (pass:[*]_callback_)(struct tracecmd_input pass:[*], struct tep_record pass:[*], int, void pass:[*]), void pass:[*]_callback_data_, bool _cont_); int *tracecmd_follow_event*(struct tracecmd_input pass:[*]_handle_, const char pass:[*]_system_, const char pass:[*]_event_name_, int (pass:[*]_callback_)(struct tracecmd_input pass:[*], struct tep_event pass:[*], struct tep_record pass:[*], int, void pass:[*]), void pass:[*]_callback_data_); int *tracecmd_follow_missed_events*(struct tracecmd_input pass:[*]_handle_, int (pass:[*]_callback_)(struct tracecmd_input pass:[*], struct tep_event pass:[*], struct tep_record pass:[*], int, void pass:[*]), void pass:[*]_callback_data_); struct tracecmd_filter pass:[*]*tracecmd_filter_add*(struct tracecmd_input *_handle_, const char pass:[*]_filter_str_, bool _neg_); int *tracecmd_iterate_reset*(struct tracecmd_input pass:[*]_handle_); -- DESCRIPTION ----------- This set of APIs can be used to iterate over events after opening a trace file using one of the open functions like *tracecmd_open(3)* or *tracecmd_open_fd(3)*. The function *tracecmd_iterate_events()* will iterate through all the events in the trace file defined by _handle_, where _handle_ is returned from one of the *tracecmd_open(3)* functions. It will call the _callback_() function on the events on the CPUs defined by _cpus_. The _cpu_size_ must be the size of _cpus_ (see *CPU_SET(3)*). If _cpus_ is NULL, then _cpu_size_ is ignored and _callback()_ will be called for all events on all CPUs in the trace file. The _callback_data_ is passed to the _callback()_ as its last parameter. _callback_ may be NULL, which is useful if *tracecmd_follow_event()* is used, but note if _callback_ is NULL, then _callback_data_ is ignored and not sent to the _callback_ of *tracecmd_follow_event()*. The function *tracecmd_iterate_events_multi()* is similar to *tracecmd_iterate_events()* except that it allows to iterate over more than one trace file. If *tracecmd agent(1)* is used to get a trace file for both the host and guest, make sure that the host trace file is the first entry in _handles_ and *tracecmd_iterate_events_multi()* will do the synchronization of the meta data for the guest files that come later in _handles_. _handles_ is an array of trace file descriptors that were opened by *tracecmd_open(3)* and friends. Note, unlike *tracecmd_iterate_events()*, *tracecmd_iterate_events_multi()* does not filter on CPUs, as it will cause the API to become too complex in knowing which handle to filter the CPUs on. If CPU filtering is desired, then the _callback_ should check the _record_->cpu to and return 0 if it is not the desired CPU to process. _nr_handles_ denotes the number of elements in _handles_. The _callback_data_ is passed to the _callback_ as its last parameter. _callback_ may be NULL, which is useful if *tracecmd_follow_event()* is used, but note if _callback_ is NULL, then _callback_data_ is ignored and not sent to the _callback_ of *tracecmd_follow_event()*. The function *tracecmd_iterate_events_reverse()* works pretty much the same way as *tracecmd_iterate_events()* works, but instead of calling the _callback_() function for each event in order of the timestamp, it will call the _callback_() function for each event in reverse order of the timestamp. If _cont_ is false, it will start by calling the event with the oldest timestamp in the trace.dat file. If _cont_ is set to true, then it will start whereever the current position of the tracing data is. For instance, if the _callback()_ return something other than zero it will exit the iteration. If *tracecmd_iterate_events_reverse()* is called again with _cont_ to true it will continue where it left off. If _cont_ is false, it will start again at the event with the oldest timestamp. The _handle_, _cpus_, _cpu_size_, and _callback_data_ act the same as *tracecmd_iterate_events()*. The _callback()_ for both *tracecmd_iterate_events()*, *tracecmd_iterate_events_reverse()* and *tracecmd_iterate_events_multi()* is of the prototype: int _callback()_(struct tracecmd_input pass:[*]_handle_, struct tep_record pass:[*]_record_, int _cpu_, void pass:[*]_data_); The _handle_ is the same _handle_ passed to *tracecmd_iterate_events()* or the current handle of _handles_ passed to *tracecmd_iterate_events_multi()* that the _record_ belongs to. The _record_ is the current event record. The _cpu_ is the current CPU being processed. Note, for *tracecmd_iterate_events_multi()* it may not be the actual CPU of the file, but the nth CPU of all the _handles_ put together. Use _record_->cpu to get the actual CPU that the event is on. The *tracecmd_follow_event()* function will attach to a trace file descriptor _handle_ and call the _callback_ when the event described by _system_ and _name_ matches an event in the iteration of *tracecmd_iterate_events()* or *tracecmd_iterate_events_multi()*. Note, the _cpu_ is the nth CPU for both *tracecmd_iterate_events()* and *tracecmd_iterate_events_multi()*. If the actual CPU of the _record_ is needed, use _record_->cpu. For *tracecmd_iterate_events_multi()*, the _callback_ is only called if the _handle_ matches the current trace file descriptor within _handles_. The _callback_data_ is passed as the last parameter to the _callback()_ function. Note, this _callback()_ function will be called before the _callback()_ function of either *tracecmd_iterate_events()* or *tracecmd_iterate_events_multi()*. The _callback()_ prototype for *tracecmd_follow_event()_ is: int _callback()_(struct tracecmd_input pass:[*]_handle_, struct tep_event pass:[*]_event, struct tep_record pass:[*]_record_, int _cpu_, void pass:[*]_data_); The *tracecmd_follow_missed_events()* function will attach to a trace file descriptor _handle_ and call the _callback_ when missed events are detected. The _event_ will hold the type of event that the _record_ is. The _record_ will hold the information of the missed events. The _cpu_ is the nth CPU for both *tracecmd_iterate_events()* and *tracecmd_iterate_events_multi()*. If the CPU that the missed events are for is needed, use _record_->cpu. If _record_->missed_events is a positive number, then it holds the number of missed events since the last event on its CPU, otherwise it will be negative, and that will mean that the number of missed events is unknown but missed events exist since the last event on the CPU. The _callback_ and _callback_data_ is the same format as *tracecmd_follow_event()* above. The missed events _callback_ is called before any of the other _callbacks_ and any filters that were added by *tracecmd_filter_add()* are ignored. If _callback_ returns a non zero, it will stop the iterator before it calls any of the other iterator callbacks for the given record. The *tracecmd_filter_add()* function, adds a filter to _handle_ that affects both *tracecmd_iterate_events()* and *tracecmd_iterate_events_multi()*. The _filter_str_ is a character string defining a filter in a format that is defined by *tep_filter_add_filter_str(3)*. If _neg_ is true, then the events that match the filter will be skipped, otherwise the events that match will execute the _callback()_ function in the iterators. The *tracecmd_iterate_reset()* sets the _handle_ back to start at the beginning, so that the next call to *tracecmd_iterate_events()* starts back at the first event again, instead of continuing where it left off. RETURN VALUE ------------ Both *tracecmd_iterate_events()*, *tracecmd_iterate_events_reverse()* and *tracecmd_iterate_events_multi()* return zero if they successfully iterated all events (handling the follow and filters appropriately). Or an error value, which can include returning a non-zero result from the _callback()_ function. *tracecmd_iterate_reset()* returns 0 on success and -1 if an error occurred. Note, if -1 is returned, a partial reset may have also happened. EXAMPLE ------- [source,c] -- #define _GNU_SOURCE #include #include #include #include struct private_data { int cpu; const char *file; }; static int print_events(struct tracecmd_input *handle, struct tep_record *record, int cpu, void *data) { static struct trace_seq seq; struct tep_handle *tep = tracecmd_get_tep(handle); struct private_data *pdata = tracecmd_get_private(handle); /* For multi handles we need this */ if (pdata->cpu >= 0 && pdata->cpu != record->cpu) return 0; if (!seq.buffer) trace_seq_init(&seq); trace_seq_reset(&seq); trace_seq_printf(&seq, "%s: ", pdata->file); tep_print_event(tep, &seq, record, "%6.1000d [%03d] %s-%d %s: %s\n", TEP_PRINT_TIME, TEP_PRINT_CPU, TEP_PRINT_COMM, TEP_PRINT_PID, TEP_PRINT_NAME, TEP_PRINT_INFO); trace_seq_terminate(&seq); trace_seq_do_printf(&seq); return 0; } static int print_event(struct tracecmd_input *handle, struct tep_event *event, struct tep_record *record, int cpu, void *data) { return print_events(handle, record, cpu, data); } static int missed_events(struct tracecmd_input *handle, struct tep_event *event, struct tep_record *record, int cpu, void *data) { if (record->missed_events > 0) printf("CPU [%03d] has %d missed events\n", record->cpu, record->missed_events); else printf("CPU [%03d] has missed events\n", record->cpu); return 0; } static void usage(const char *argv0) { printf("usage: [-c cpu][-f filter][-e event] %s trace.dat [trace.dat ...]\n", argv0); exit(-1); } int main(int argc, char **argv) { struct tracecmd_input **handles = NULL; const char *filter_str = NULL; const char *argv0 = argv[0]; struct private_data *priv; cpu_set_t *cpuset = NULL; char *event = NULL; size_t cpusize = 0; int nr_handles = 0; int cpu = -1; int i; int c; while ((c = getopt(argc, argv, "c:f:e:")) >= 0) { switch (c) { case 'c': /* filter all trace data to this one CPU. */ cpu = atoi(optarg); break; case 'f': filter_str = optarg; break; case 'e': event = optarg; break; default: usage(argv0); } } argc -= optind; argv += optind; if (argc == 0) usage(argv0); for (i = 0; i < argc; i++) { handles = realloc(handles, sizeof(*handles) * (nr_handles + 1)); if (!handles) exit(-1); handles[nr_handles] = tracecmd_open(argv[i], 0); if (!handles[nr_handles]) { perror(argv[i]); exit(-1); } if (filter_str) { if (tracecmd_filter_add(handles[nr_handles], filter_str, false) == NULL) { perror("adding filter"); exit(-1); } } priv = calloc(1, sizeof(*priv)); if (!priv) exit(-1); priv->file = argv[i]; priv->cpu = cpu; tracecmd_set_private(handles[nr_handles], priv); if (event) { if (tracecmd_follow_event(handles[nr_handles], NULL, event, print_event, NULL) < 0) { printf("Could not follow event %s for file %s\n", event, argv[i]); exit(-1); } } tracecmd_follow_missed_events(handles[nr_handles], missed_events, NULL); nr_handles++; } /* Shortcut */ if (nr_handles == 1) { if (cpu >= 0) { cpuset = CPU_ALLOC(cpu + 1); if (!cpuset) exit(-1); cpusize = CPU_ALLOC_SIZE(cpu + 1); CPU_SET_S(cpu, cpusize, cpuset); } if (event) tracecmd_iterate_events(handles[0], cpuset, cpusize, NULL, NULL); else tracecmd_iterate_events(handles[0], cpuset, cpusize, print_events, NULL); } else { if (event) tracecmd_iterate_events_multi(handles, nr_handles, NULL, NULL); else tracecmd_iterate_events_multi(handles, nr_handles, print_events, NULL); } for (i = 0; i < nr_handles; i++) { priv = tracecmd_get_private(handles[i]); free(priv); tracecmd_close(handles[i]); } free(handles); } -- FILES ----- [verse] -- *trace-cmd.h* Header file to include in order to have access to the library APIs. *-ltracecmd* Linker switch to add when building a program that uses the library. -- SEE ALSO -------- *libtracefs(3)*, *libtraceevent(3)*, *trace-cmd(1)* *trace-cmd.dat(5)* AUTHOR ------ [verse] -- *Steven Rostedt* *Tzvetomir Stoyanov* -- REPORTING BUGS -------------- Report bugs to LICENSE ------- libtracecmd is Free Software licensed under the GNU LGPL 2.1 RESOURCES --------- https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/ COPYING ------- Copyright \(C) 2020 VMware, Inc. Free use of this software is granted under the terms of the GNU Public License (GPL). trace-cmd-v3.3.1/Documentation/libtracecmd/libtracecmd-log.txt000066400000000000000000000033621470231550600244250ustar00rootroot00000000000000libtracecmd(3) ============= NAME ---- tracecmd_set_loglevel - Set log level of the library SYNOPSIS -------- [verse] -- *#include * int *tracecmd_set_loglevel*(enum tep_loglevel _level_); -- DESCRIPTION ----------- The *tracecmd_set_loglevel()* function sets the level of the library logs that will be printed on the console. See *libtraceevent(3)* for detailed desciription of the log levels. Setting the log level to specific value means that logs from the previous levels will be printed too. For example _TEP_LOG_WARNING_ will print any logs with severity _TEP_LOG_WARNING_, _TEP_LOG_ERROR_ and _TEP_LOG_CRITICAL_. The default log level is _TEP_LOG_CRITICAL_. When a new level is set, it is also propagated to the libtracefs and libtraceevent. EXAMPLE ------- [source,c] -- #include ... tracecmd_set_loglevel(TEP_LOG_ALL); ... /* call libtracecmd, libtracefs or libtraceevent APIs and observe any logs they produce */ ... tracecmd_set_loglevel(TEP_LOG_CRITICAL); -- FILES ----- [verse] -- *trace-cmd.h* Header file to include in order to have access to the library APIs. *-ltracecmd* Linker switch to add when building a program that uses the library. -- SEE ALSO -------- *libtracefs(3)*, *libtraceevent(3)*, *trace-cmd(1)* *trace-cmd.dat(5)* AUTHOR ------ [verse] -- *Steven Rostedt* *Tzvetomir Stoyanov* -- REPORTING BUGS -------------- Report bugs to LICENSE ------- libtracecmd is Free Software licensed under the GNU LGPL 2.1 RESOURCES --------- https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/ COPYING ------- Copyright \(C) 2021 VMware, Inc. Free use of this software is granted under the terms of the GNU Public License (GPL). trace-cmd-v3.3.1/Documentation/libtracecmd/libtracecmd-maps.txt000066400000000000000000000133471470231550600246100ustar00rootroot00000000000000libtracecmd(3) ============= NAME ---- tracecmd_map_vcpus, tracecmd_get_cpu_map, tracecmd_map_find_by_host_pid, tracecmd_map_get_host_pid, tracecmd_map_get_guest, tracecmd_map_set_private, tracecmd_map_get_private - Mapping host and guest data SYNOPSIS -------- [verse] -- *#include * int *tracecmd_map_vcpus*(struct tracecmd_input pass:[**]handles, int nr_handles); struct tracecmd_cpu_map pass:[*]*tracecmd_get_cpu_map*(struct tracecmd_input pass:[*]handle, int cpu); struct tracecmd_cpu_map pass:[*]*tracecmd_map_find_by_host_pid*(struct tracecmd_input pass:[*]handle, int host_pid); int *tracecmd_map_get_host_pid*(struct tracecmd_cpu_map pass:[*]map); struct tracecmd_input pass:[*]*tracecmd_map_get_guest*(struct tracecmd_cpu_map pass:[*]map); void *tracecmd_map_set_private*(struct tracecmd_cpu_map pass:[*]map, void pass:[*]priv); void pass:[*]*tracecmd_map_get_private*(struct tracecmd_cpu_map pass:[*]map); -- DESCRIPTION ----------- This set of APIs is used to map host and guest trace files for to facilitate further tracing analysis. The *tracecmd_map_vcpus()* takes an array of _handles_ where each item in that array was created by one of the *tracecmd_open(3)* functions, and the number of handles as _nr_handles_. The first handle in the array of _handles_ is expected to be the descriptor for the host tracing file, and the rest are guest trace files that run on the host, and were created by the *trace-cmd record(1)* and *trace-cmd agent(1)* interactions. It returns the number of guests found in _handles_ that were associated with the host, or negative on error. The *tracecmd_get_cpu_map()* returns a descriptor for a given CPU for a handle. If the _handle_ was a guest defined from *tracecmd_map_vcpus()* then the mapping created from that function that is associated to this particular vCPU (denoted by _cpu_) from _handle_. This destriptor can be used by *tarcecmd_map_get_guest()*, *tracecmd_map_set_private()* and *tracecmd_map_get_private()* functions. The *tracecmd_map_find_by_host_pid()* will return a mapping for a guest virtual CPU that is handled by the given _host_pid_. Note, the _handle_ passed in can be either the host handle or one of the guest's handles for that host that was mapped by *tracecmd_map_vcpus()*, even if the guest handle does not have the vCPU that the _host_pid_ represents. The *tracecmd_map_get_host_pid()* will recturn the host_pid for a given _map_ that was retrieved by one of the above functions. The *tracecmd_map_get_guest()* will recturn the guest_handle for a given _map_ that was retrieved by one of the above functions. The *tracecmd_map_set_private()* allows the application to assign private data for a given guest vCPU to host thread mapping defined by _map_. The *tracecmd_map_get_private()* retrieves the _priv_ data from _map_ that was set by *tracecmd_map_set_private()*. RETURN VALUE ------------ *tracecmd_map_vcpus()* returns the number of guests in the _handles_ array that were mapped to the host handle that is the first entry in _handles_. It returns -1 on error. *tracecmd_get_cpu_map()* returns a map created by *tracecmd_map_vcpus()* for a given _cpu_ for a given _handle_, or NULL if it is not found. *tracecmd_map_find_by_host_pid()* returns a map that is associated by the host task with _host_pid_ as its process ID. _handle_ can be either a the host handle, or one of the guest handles that were mapped to the host via *tracecmd_map_vcpus()*, even if the guest handle is another guest than the one that the mapping is for. It returns NULL if not found. *tracecmd_map_get_host_pid()* returns the host process ID for an associated mapping defined by _map_. *tracecmd_map_get_guest()* returns the guest handle for an associated mapping defined by _map_. *tracecmd_map_get_private()* returns the private data of a mapping defined by _map_ that was set by *tracecmd_map_set_private()*. EXAMPLE ------- [source,c] -- #include #include #include int main(int argc, char **argv) { struct tracecmd_input **handles = NULL; int nr_handles; int i; if (argc < 2) { printf("usage: host_trace.dat guest1_trace.dat [guest2_trace.dat ...]\n"); exit(-1); } for (i = 1; i < argc; i++) { handles = realloc(handles, sizeof(*handles) * (nr_handles + 1)); if (!handles) exit(-1); handles[nr_handles] = tracecmd_open(argv[i], 0); if (!handles[nr_handles]) { perror(argv[1]); exit(-1); } tracecmd_set_private(handles[nr_handles], argv[i]); nr_handles++; } tracecmd_map_vcpus(handles, nr_handles); for (i = 1; i < nr_handles; i++) { struct tracecmd_cpu_map *map; struct tep_handle *tep; const char *file = tracecmd_get_private(handles[i]); int cpus, cpu; printf("Mappings for guest %s:\n", file); tep = tracecmd_get_tep(handles[i]); cpus = tep_get_cpus(tep); for (cpu = 0; cpu < cpus; cpu++) { printf(" [%03d] ", cpu); map = tracecmd_get_cpu_map(handles[i], cpu); if (!map) { printf("Has no mapping!\n"); continue; } printf("host_pid: %d\n", tracecmd_map_get_host_pid(map)); } } for (i = 0; i < nr_handles; i++) tracecmd_close(handles[i]); free(handles); exit(0); } -- FILES ----- [verse] -- *trace-cmd.h* Header file to include in order to have access to the library APIs. *-ltracecmd* Linker switch to add when building a program that uses the library. -- SEE ALSO -------- *libtracefs(3)*, *libtraceevent(3)*, *trace-cmd(1)* *trace-cmd.dat(5)* REPORTING BUGS -------------- Report bugs to LICENSE ------- libtracecmd is Free Software licensed under the GNU LGPL 2.1 RESOURCES --------- https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/ COPYING ------- Copyright \(C) 2020 VMware, Inc. Free use of this software is granted under the terms of the GNU Public License (GPL). trace-cmd-v3.3.1/Documentation/libtracecmd/libtracecmd-peer.txt000066400000000000000000000104751470231550600246020ustar00rootroot00000000000000libtracecmd(3) ============= NAME ---- tracecmd_get_traceid, tracecmd_get_guest_cpumap - Manage trace session with multiple trace peers, recorded in multiple trace files. SYNOPSIS -------- [verse] -- *#include * unsigned long long *tracecmd_get_traceid*(struct tracecmd_input pass:[*]_handle_); int *tracecmd_get_guest_cpumap*(struct tracecmd_input pass:[*]_handle_, unsigned long long _trace_id_, const char pass:[*]pass:[*]_name_, int pass:[*]_vcpu_count_, const int pass:[*]pass:[*]_cpu_pid_); -- DESCRIPTION ----------- This set of APIs can be used to manage a trace session with multiple trace peers, for example, tracing both a host and one or more guest virtual machines. The trace data of each peer from the session is recorded in separate trace files. Information about peers from the session is stored in the metadata of each trace file. These APIs use that information to extract and synchronize the trace data. The *tracecmd_get_traceid()* function returns the trace ID stored in the trace file metadata associated with _handle_. Each peer from a trace session has an ID unique for that peer and that trace session only. This ID is used to match multiple trace files recorded in a same trace session. The *tracecmd_get_guest_cpumap()* function gets the mapping of guest virtual CPUs (VCPU) to the host process that represents those VCPUs and is stored in the metadata of the trace file associated with _handle_. This information is gathered during a host-guest trace session and is stored in the host trace file. The _trace_id_ parameter is the trace ID of the guest in this particular trace session. If a guest with that ID was part of that session, its VCPU to host process mapping is in the host trace file and the information is returned in _name_, _vcpu_count_ and _cpu_pid_ parameters. The _name_ parameter contains the name of the guest, the _vcpu_count_ contains the count of VCPUs of that guest and the _cpu_pid_ array contains the VCPU to host process mapping. The array is of size _vcpu_count_ where the index is VCPU and the value is the process ID (PID) of the host process, running that VCPU. The _name_, _vcpu_count_ and _cpu_pid_ values must *not* be freed. RETURN VALUE ------------ The *tracecmd_get_traceid()* function returns a 64 bit trace ID. The *tracecmd_get_guest_cpumap()* function returns -1 in case of an error or 0 otherwise. If 0 is returned, then the _name_, _vcpu_count_ and _cpu_pid_ parameters contain the requested information. EXAMPLE ------- [source,c] -- #include ... struct tracecmd_input *host = tracecmd_open("trace.dat"); if (!host) { /* Failed to open host trace file */ } struct tracecmd_input *guest1 = tracecmd_open_head("trace-Guest1.dat"); if (!guest1) { /* Failed to open guest1 trace file */ } struct tracecmd_input *guest2 = tracecmd_open_head("trace-Guest2.dat"); if (!guest2) { /* Failed to open guest2 trace file */ } unsigned long long guest_id_1 = tracecmd_get_traceid(guest1); unsigned long long guest_id_2 = tracecmd_get_traceid(guest2); int *cpu_pid_1, *cpu_pid_2; int vcount_1, vcount_2; char *name_1, *name_2; if (!tracecmd_get_guest_cpumap(host, guest_id_1, &name_1, &vcount_1, &cpu_pid_1)) { /* The Host and a guest1 with name_1 are part of the same trace session. * Got guest1 VCPU to host PID mapping. */ } if (!tracecmd_get_guest_cpumap(host, guest_id_2, &name_2, &vcount_2, &cpu_pid_2)) { /* The Host and a guest2 with name_2 are part of the same trace session. * Got guest2 VCPU to host PID mapping. */ } ... tracecmd_close(guest1); tracecmd_close(guest2); tracecmd_close(handle); -- FILES ----- [verse] -- *trace-cmd.h* Header file to include in order to have access to the library APIs. *-ltracecmd* Linker switch to add when building a program that uses the library. -- SEE ALSO -------- *libtracefs(3)*, *libtraceevent(3)*, *trace-cmd(1)* *trace-cmd.dat(5)* AUTHOR ------ [verse] -- *Steven Rostedt* *Tzvetomir Stoyanov* -- REPORTING BUGS -------------- Report bugs to LICENSE ------- libtracecmd is Free Software licensed under the GNU LGPL 2.1 RESOURCES --------- https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/ COPYING ------- Copyright \(C) 2020 VMware, Inc. Free use of this software is granted under the terms of the GNU Public License (GPL). trace-cmd-v3.3.1/Documentation/libtracecmd/libtracecmd-record.txt000066400000000000000000000074011470231550600251200ustar00rootroot00000000000000libtracecmd(3) ============= NAME ---- tracecmd_read_cpu_first, tracecmd_read_data, tracecmd_read_at, tracecmd_free_record, tracecmd_get_tep - Read recorded events from a trace file. SYNOPSIS -------- [verse] -- *#include * struct tep_record pass:[*]*tracecmd_read_cpu_first*(struct tracecmd_input pass:[*]_handle_, int _cpu_); struct tep_record pass:[*]*tracecmd_read_data*(struct tracecmd_input pass:[*]_handle_, int _cpu_); struct tep_record pass:[*]*tracecmd_read_at*(struct tracecmd_input pass:[*]_handle_, unsigned long long _offset_, int pass:[*]_cpu_); void *tracecmd_free_record*(struct tep_record pass:[*]_record_); struct tep_handle pass:[*]*tracecmd_get_tep*(struct tracecmd_input pass:[*]_handle_); -- DESCRIPTION ----------- This set of APIs can be used to read tracing data from a trace file opened with *tracecmd_open()(3)*, *tracecmd_open_fd()(3)* or *tracecmd_open_head()(3)*. The *tracecmd_read_cpu_first()* function reads the first trace record for a given _cpu_ from a trace file associated with _handle_. The returned record must be freed with *tracecmd_free_record()*. The *tracecmd_read_data()* function reads the next trace record for a given _cpu_ from a trace file associated with _handle_ and increments the read location pointer, so that the next call to *tracecmd_read_data()* will not read the same record again. The returned record must be freed with *tracecmd_free_record()*. The *tracecmd_read_at()* function reads a trace record from a specific _offset_ within the file associated with _handle_. The CPU on which the recorded event occurred is stored in the _cpu_. The function does not change the current read location pointer. The returned record must be freed with *tracecmd_free_record()*. The *tracecmd_free_record()* function frees a _record_ returned by any of the _tracecmd_read__ APIs. The *tracecmd_get_tep()* function returns a tep context for a given _handle_. RETURN VALUE ------------ The *tracecmd_read_cpu_first()*, *tracecmd_read_data()* and *tracecmd_read_at()* functions return a pointer to struct tep_record or NULL in case of an error.The returned record must be freed with *tracecmd_free_record()*. The *tracecmd_get_tep()* function returns a pointer to tep context or NULL if there is no tep context for the given _handle_. The returned tep pointer must *not* be freed. EXAMPLE ------- [source,c] -- #include ... struct tracecmd_input *handle = tracecmd_open("trace.dat"); if (!handle) { /* Failed to open trace.dat file */ } ... unsigned long long offset = 0; struct tep_record *rec; int cpu = 0; rec = tracecmd_read_cpu_first(handle, cpu); while (rec) { ... if ( /* some interesting record noticed */) { /* store the offset of the interesting record */ offset = rec->offset; } ... tracecmd_free_record(rec); rec = tracecmd_read_data(handle, cpu); } ... if (offset) { rec = tracecmd_read_at(handle, offset, &cpu); if (rec) { /* Got record at offset on cpu */ ... tracecmd_free_record(rec); } } ... tracecmd_close(hadle); -- FILES ----- [verse] -- *trace-cmd.h* Header file to include in order to have access to the library APIs. *-ltracecmd* Linker switch to add when building a program that uses the library. -- SEE ALSO -------- *libtracefs(3)*, *libtraceevent(3)*, *trace-cmd(1)* *trace-cmd.dat(5)* AUTHOR ------ [verse] -- *Steven Rostedt* *Tzvetomir Stoyanov* -- REPORTING BUGS -------------- Report bugs to LICENSE ------- libtracecmd is Free Software licensed under the GNU LGPL 2.1 RESOURCES --------- https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/ COPYING ------- Copyright \(C) 2020 VMware, Inc. Free use of this software is granted under the terms of the GNU Public License (GPL). trace-cmd-v3.3.1/Documentation/libtracecmd/libtracecmd-timestamp.txt000066400000000000000000000110211470231550600256360ustar00rootroot00000000000000libtracecmd(3) ============= NAME ---- tracecmd_get_first_ts, tracecmd_add_ts_offset, tracecmd_get_tsc2nsec - Handle time stamps from a trace file. SYNOPSIS -------- [verse] -- *#include * unsigned long long *tracecmd_get_first_ts*(struct tracecmd_input pass:[*]_handle_); void *tracecmd_add_ts_offset*(struct tracecmd_input pass:[*]_handle_, long long _offset_); int *tracecmd_get_tsc2nsec*(struct tracecmd_input pass:[*]_handle_, int pass:[*]_mult_, int pass[*]_shift_, unsigned long long pass:[*]_offset_); -- DESCRIPTION ----------- This set of APIs can be used to read tracing data from a trace file opened with _tracecmd_open()(3)_, _tracecmd_open_fd()(3)_ or _tracecmd_open_head()(3)_. The *tracecmd_get_first_ts()* function returns the time stamp of the first record in the _handle_. The *tracecmd_add_ts_offset()* function adds an offset to each of the records in the _handle_ that represents a trace file. This is useful for associating two different tracing files by their offset (for example a trace file from a host and a trace file from a guest that were not synchronized when created). The *tracecmd_get_tsc2nsec* returns the calculation values that convert the raw timestamp into nanoseconds. The parameters are pointers to the storage to save the values, or NULL to ignore them. The multiplier will be saved in _mult_, the shift value will be saved in _shift_, and the offset value will be saved in _offset_, if the corresponding parameters are not NULL. RETURN VALUE ------------ The *tracecmd_get_first_ts()* function returns the timestamp of the first record in a trace file for the given _handle_. The *tracecmd_get_tsc2nsec*() returns 0 if the tracing clock supports the shift values and -1 otherwise. Note, that if the trace file has the TSC2NSEC option, the values returned in the parameters may still be valid even if the function itself returns -1. The return value only notes if the values will be used in the calculations of the given clock. EXAMPLE ------- [source,c] -- #include #include static int print_events(struct tracecmd_input *handle, struct tep_record *record, int cpu, void *data) { static struct trace_seq seq; struct tep_handle *tep = tracecmd_get_tep(handle); const char *file = tracecmd_get_private(handle); if (!seq.buffer) trace_seq_init(&seq); trace_seq_reset(&seq); trace_seq_printf(&seq, "%s: ", file); tep_print_event(tep, &seq, record, "%6.1000d [%03d] %s-%d %s: %s\n", TEP_PRINT_TIME, TEP_PRINT_CPU, TEP_PRINT_COMM, TEP_PRINT_PID, TEP_PRINT_NAME, TEP_PRINT_INFO); trace_seq_terminate(&seq); trace_seq_do_printf(&seq); return 0; } int main(int argc, char **argv) { struct tracecmd_input **handles = NULL; unsigned long long ts, first_ts = 0; unsigned long long offset; int multi; int shift; int nr_handles = 0; int ret; int i; if (argc < 2) { printf("usage: %s trace.dat [trace.dat ...]\n", argv[0]); exit(-1); } for (i = 1; i < argc; i++) { handles = realloc(handles, sizeof(*handles) * (nr_handles + 1)); if (!handles) exit(-1); handles[nr_handles] = tracecmd_open(argv[i], 0); if (!handles[nr_handles]) exit(-1); ret = tracecmd_get_tsc2nsec(handles[nr_handles], &multi, &shift, &offset); if (!ret) printf(" %s has tsc2nsec calculations of mult:%d shift:%d offset:%lld\n", argv[i], multi, shift, offset); tracecmd_set_private(handles[nr_handles], argv[i]); ts = tracecmd_get_first_ts(handles[nr_handles]); if (!first_ts || ts < first_ts) first_ts = ts; nr_handles++; } /* Set the time stamp to start at the first record found */ for (i = 0; i < nr_handles; i++) tracecmd_add_ts_offset(handles[i], -first_ts); tracecmd_iterate_events_multi(handles, nr_handles, print_events, NULL); for (i = 0; i < nr_handles; i++) tracecmd_close(handles[i]); free(handles); } -- FILES ----- [verse] -- *trace-cmd.h* Header file to include in order to have access to the library APIs. *-ltracecmd* Linker switch to add when building a program that uses the library. -- SEE ALSO -------- _libtracefs(3)_, _libtraceevent(3)_, _trace-cmd(1)_ _trace-cmd.dat(5)_ AUTHOR ------ [verse] -- *Steven Rostedt* *Tzvetomir Stoyanov* -- REPORTING BUGS -------------- Report bugs to LICENSE ------- libtracecmd is Free Software licensed under the GNU LGPL 2.1 RESOURCES --------- https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/ COPYING ------- Copyright \(C) 2020 VMware, Inc. Free use of this software is granted under the terms of the GNU Public License (GPL). trace-cmd-v3.3.1/Documentation/libtracecmd/libtracecmd.txt000066400000000000000000000133131470231550600236430ustar00rootroot00000000000000libtracecmd(3) ============= NAME ---- libtracecmd - trace-cmd library APIs SYNOPSIS -------- [verse] -- *#include * Open and close trace file: struct tracecmd_input pass:[*]*tracecmd_open*(const char pass:[*]_file_, int _flags_); struct tracecmd_input pass:[*]*tracecmd_open_fd*(int _fd_, int _flags_); struct tracecmd_input pass:[*]*tracecmd_open_head*(const char pass:[*]_file_, int _flags_); void *tracecmd_close*(struct tracecmd_input pass:[*]_handle_); void *tracecmd_set_private*(struct tracecmd_input pass:[*]_handle_, void pass:[*]_data_); void pass:[*]*tracecmd_get_private*(struct tracecmd_input pass:[*]_handle_); Read tracing records from a trace file: int *tracecmd_init_data*(struct tracecmd_input pass:[*]_handle_); struct tep_record pass:[*]*tracecmd_read_cpu_first*(struct tracecmd_input pass:[*]_handle_, int _cpu_); struct tep_record pass:[*]*tracecmd_read_data*(struct tracecmd_input pass:[*]_handle_, int _cpu_); struct tep_record pass:[*]*tracecmd_read_at*(struct tracecmd_input pass:[*]_handle_, unsigned long long _offset_, int pass:[*]_cpu_); void *tracecmd_free_record*(struct tep_record pass:[*]_record_); struct tep_handle pass:[*]*tracecmd_get_tep*(struct tracecmd_input pass:[*]_handle_); Iterating over events in a trace file: int *tracecmd_iterate_events*(struct tracecmd_input pass:[*]_handle_, cpu_set_t pass:[*]_cpus_, int _cpu_size_, int (pass:[*]_callback_)(struct tracecmd_input pass:[*], struct tep_record pass:[*], int, void pass:[*]), void pass:[*]_callback_data_); int *tracecmd_iterate_events_multi*(struct tracecmd_input pass:[**]_handles_, int _nr_handles_, int (pass:[*]_callback_)(struct tracecmd_input pass:[*], struct tep_record pass:[*], int, void pass:[*]), void pass:[*]_callback_data_); int *tracecmd_iterate_events_reverse*(struct tracecmd_input pass:[*]_handle_, cpu_set_t pass:[*]_cpus_, int _cpu_size_, int (pass:[*]_callback_)(struct tracecmd_input pass:[*], struct tep_record pass:[*], int, void pass:[*]), void pass:[*]_callback_data_, bool _cont_); int *tracecmd_follow_event*(struct tracecmd_input pass:[*]_handle_, const char pass:[*]_system_, const char pass:[*]_event_name_, int (pass:[*]_callback_)(struct tracecmd_input pass:[*], struct tep_event pass:[*], struct tep_record pass:[*], int, void pass:[*]), void pass:[*]_callback_data_); int *tracecmd_follow_missed_events*(struct tracecmd_input pass:[*]_handle_, int (pass:[*]_callback_)(struct tracecmd_input pass:[*], struct tep_event pass:[*], struct tep_record pass:[*], int, void pass:[*]), void pass:[*]_callback_data_); struct tracecmd_filter pass:[*]*tracecmd_filter_add*(struct tracecmd_input *_handle_, const char pass:[*]_filter_str_, bool _neg_); int *tracecmd_iterate_reset*(struct tracecmd_input pass:[*]_handle_); Read tracing instances from a trace file: int *tracecmd_buffer_instances*(struct tracecmd_input pass:[*]_handle_); const char pass:[*]*tracecmd_buffer_instance_name*(struct tracecmd_input pass:[*]_handle_, int _indx_); struct tracecmd_input pass:[*]*tracecmd_buffer_instance_handle*(struct tracecmd_input pass:[*]_handle_, int _indx_); Handle time stamps from a trace file: unsigned long long *tracecmd_get_first_ts*(struct tracecmd_input pass:[*]_handle_); void *tracecmd_add_ts_offset*(struct tracecmd_input pass:[*]_handle_, long long _offset_); int *tracecmd_get_tsc2nsec*(struct tracecmd_input pass:[*]_handle_, int pass:[*]_mult_, int pass[*]_shift_, unsigned long long pass:[*]_offset_); Get traceing peer information from a trace file: unsigned long long *tracecmd_get_traceid*(struct tracecmd_input pass:[*]_handle_); int *tracecmd_get_guest_cpumap*(struct tracecmd_input pass:[*]_handle_, unsigned long long _trace_id_, const char pass:[*]pass:[*]_name_, int pass:[*]_vcpu_count_, const int pass:[*]pass:[*]_cpu_pid_); Mapping host and guest trace files: int *tracecmd_map_vcpus*(struct tracecmd_input pass:[**]handles, int nr_handles); struct tracecmd_cpu_map pass:[*]*tracecmd_get_cpu_map*(struct tracecmd_input pass:[*]handle, int cpu); struct tracecmd_cpu_map pass:[*]*tracecmd_map_find_by_host_pid*(struct tracecmd_input pass:[*]handle, int host_pid); int *tracecmd_map_get_host_pid*(struct tracecmd_cpu_map pass:[*]map); struct tracecmd_input pass:[*]*tracecmd_map_get_guest*(struct tracecmd_cpu_map pass:[*]map); void *tracecmd_map_set_private*(struct tracecmd_cpu_map pass:[*]map, void pass:[*]priv); void pass:[*]*tracecmd_map_get_private*(struct tracecmd_cpu_map pass:[*]map); Control library logs: int *tracecmd_set_loglevel*(enum tep_loglevel _level_); -- DESCRIPTION ----------- The libtracecmd(3) library provides APIs to read, parse and write _trace-cmd.dat(5)_ files, recorded with _trace-cmd(1)_ application and containing tracing information from ftrace, the official Linux kernel tracer. FILES ----- [verse] -- *trace-cmd.h* Header file to include in order to have access to the library APIs. *-ltracecmd* Linker switch to add when building a program that uses the library. -- SEE ALSO -------- *libtraceevent(3)* *libtracefs(3)* *trace-cmd(1)* *trace-cmd.dat(5)* AUTHOR ------ [verse] -- *Steven Rostedt* *Tzvetomir Stoyanov* -- REPORTING BUGS -------------- Report bugs to LICENSE ------- libtracecmd is Free Software licensed under the GNU LGPL 2.1 RESOURCES --------- https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/ COPYING ------- Copyright \(C) 2020 VMware, Inc. Free use of this software is granted under the terms of the GNU Public License (GPL). trace-cmd-v3.3.1/Documentation/libtracecmd/meson.build000066400000000000000000000116251470231550600227770ustar00rootroot00000000000000# SPDX-License-Identifier: LGPL-2.1 # # Copyright (c) 2023 Daniel Wagner, SUSE LLC # input text file: man page section sources = { 'libtracecmd-files.txt': '3', 'libtracecmd-instances.txt': '3', 'libtracecmd-iterate.txt': '3', 'libtracecmd-log.txt': '3', 'libtracecmd-maps.txt': '3', 'libtracecmd-peer.txt': '3', 'libtracecmd-record.txt': '3', 'libtracecmd-timestamp.txt': '3', 'libtracecmd.txt': '3', } confdir = meson.current_source_dir() + '/../' top_source_dir = meson.current_source_dir() + '/../../' # # For asciidoc ... # -7.1.2, no extra settings are needed. # 8.0-, set ASCIIDOC8. # # # For docbook-xsl ... # -1.68.1, set ASCIIDOC_NO_ROFF? (based on changelog from 1.73.0) # 1.69.0, no extra settings are needed? # 1.69.1-1.71.0, set DOCBOOK_SUPPRESS_SP? # 1.71.1, no extra settings are needed? # 1.72.0, set DOCBOOK_XSL_172. # 1.73.0-, set ASCIIDOC_NO_ROFF # # # If you had been using DOCBOOK_XSL_172 in an attempt to get rid # of 'the ".ft C" problem' in your generated manpages, and you # instead ended up with weird characters around callouts, try # using ASCIIDOC_NO_ROFF instead (it works fine with ASCIIDOC8). # if get_option('asciidoctor') asciidoc = find_program('asciidoctor') asciidoc_extra = ['-a', 'compat-mode'] asciidoc_extra += ['-I.'] asciidoc_extra += ['-r', 'asciidoctor-extensions'] asciidoc_extra += ['-a', 'mansource=libtraceevent'] asciidoc_extra += ['-a', 'manmanual="libtraceevent Manual"'] asciidoc_html = 'xhtml5' else asciidoc = find_program('asciidoc') asciidoc_extra = ['--unsafe'] asciidoc_extra += ['-f', confdir + 'asciidoc.conf'] asciidoc_html = 'xhtml11' r = run_command(asciidoc, '--version', check: true) v = r.stdout().strip() if v.version_compare('>=8.0') asciidoc_extra += ['-a', 'asciidoc7compatible'] endif endif manpage_xsl = confdir + 'manpage-normal.xsl' if get_option('docbook-xls-172') asciidoc_extra += ['-a', 'libtraceevent-asciidoc-no-roff'] manpage_xsl = confdir + 'manpage-1.72.xsl' elif get_option('asciidoc-no-roff') # docbook-xsl after 1.72 needs the regular XSL, but will not # pass-thru raw roff codes from asciidoc.conf, so turn them off. asciidoc_extra += ['-a', 'libtraceevent-asciidoc-no-roff'] endif xmlto = find_program('xmlto') xmlto_extra = [] if get_option('man-bold-literal') xmlto_extra += ['-m ', confdir + 'manpage-bold-literal.xsl'] endif if get_option('docbook-suppress-sp') xmlto_extra += ['-m ', confdir + 'manpage-suppress-sp.xsl'] endif check_doc = custom_target( 'check-doc', output: 'dummy', command : [ top_source_dir + 'check-manpages.sh', meson.current_source_dir()]) gen = generator( asciidoc, output: '@BASENAME@.xml', arguments: [ '-b', 'docbook', '-d', 'manpage', '-a', 'libtraceevent_version=' + meson.project_version(), '-o', '@OUTPUT@'] + asciidoc_extra + ['@INPUT@']) man = [] html = [] foreach txt, section : sources # build man page(s) xml = gen.process(txt) man += custom_target( txt.underscorify() + '_man', input: xml, output: '@BASENAME@.' + section, depends: check_doc, command: [ xmlto, '-m', manpage_xsl, 'man', '-o', '@OUTPUT@'] + xmlto_extra + ['@INPUT@']) # build html pages html += custom_target( txt.underscorify() + '_html', input: txt, output: '@BASENAME@.html', depends: check_doc, command: [ asciidoc, '-b', asciidoc_html, '-d', 'manpage', '-a', 'libtraceevent_version=' + meson.project_version(), '-o', '@OUTPUT@'] + asciidoc_extra + ['@INPUT@']) endforeach # Install path workaround because: # # - xmlto might generate more than one file and we would to tell meson # about those output files. We could figure out which files are generated # (see sed match in check-manpages.sh). # # - The man page generation puts all the generated files under sub dirs # and it's not obvious how to tell Meson it should not do this without # causing the install step to fail (confusion where the generated files # are stored) # # - The documentation build is not part of the 'build' target. The user # has explicitly to trigger the doc build. Hence the documentation is # not added to the 'install' target. # # Thus just use a plain old shell script to move the generated files to the # right location. conf = configuration_data() conf.set('SRCDIR', meson.current_build_dir()) conf.set('MANDIR', mandir) conf.set('HTMLDIR', htmldir) configure_file( input: 'install-docs.sh.in', output: 'install-docs.sh', configuration: conf) meson.add_install_script( join_paths(meson.current_build_dir(), 'install-docs.sh')) trace-cmd-v3.3.1/Documentation/manpage-1.72.xsl000066400000000000000000000007761470231550600211160ustar00rootroot00000000000000 trace-cmd-v3.3.1/Documentation/manpage-base.xsl000066400000000000000000000022501470231550600214260ustar00rootroot00000000000000 sp br trace-cmd-v3.3.1/Documentation/manpage-bold-literal.xsl000066400000000000000000000011021470231550600230610ustar00rootroot00000000000000 fB fR trace-cmd-v3.3.1/Documentation/manpage-normal.xsl000066400000000000000000000007331470231550600220100ustar00rootroot00000000000000 \ . trace-cmd-v3.3.1/Documentation/manpage-suppress-sp.xsl000066400000000000000000000013411470231550600230200ustar00rootroot00000000000000 trace-cmd-v3.3.1/Documentation/trace-cmd/000077500000000000000000000000001470231550600202165ustar00rootroot00000000000000trace-cmd-v3.3.1/Documentation/trace-cmd/Makefile000066400000000000000000000066311470231550600216640ustar00rootroot00000000000000# SPDX-License-Identifier: GPL-2.0 # Include the utils include $(src)/scripts/utils.mk # This Makefile and manpage XSL files were taken from libtracefs # and modified for libtracecmd MAN1_TXT= \ $(wildcard trace-cmd*.1.txt) MAN5_TXT= \ $(wildcard trace-cmd*.5.txt) MAN_TXT = $(MAN1_TXT) $(MAN5_TXT) _MAN_XML=$(patsubst %.txt,%.xml,$(MAN_TXT)) _MAN_HTML=$(patsubst %.txt,%.html,$(MAN_TXT)) _DOC_MAN1=$(patsubst %.1.txt,%.1,$(MAN1_TXT)) _DOC_MAN5=$(patsubst %.5.txt,%.5,$(MAN5_TXT)) MAN_XML=$(addprefix $(OUTPUT),$(_MAN_XML)) MAN_HTML=$(addprefix $(OUTPUT),$(_MAN_HTML)) DOC_MAN1=$(addprefix $(OUTPUT),$(_DOC_MAN1)) DOC_MAN5=$(addprefix $(OUTPUT),$(_DOC_MAN5)) # Make the path relative to DESTDIR, not prefix ifndef DESTDIR prefix?=$(HOME) endif bindir?=$(prefix)/bin htmldir?=$(prefix)/share/doc/trace-cmd pdfdir?=$(prefix)/share/doc/trace-cmd mandir?=$(prefix)/share/man man1dir=$(mandir)/man1 man5dir=$(mandir)/man5 ifdef USE_ASCIIDOCTOR ASCIIDOC_EXTRA += -a mansource="libtracecmd" -a manmanual="libtracecmd Manual" endif all: check-man-tools html man man: man1 man5 man1: $(DOC_MAN1) man5: $(DOC_MAN5) html: $(MAN_HTML) $(MAN_HTML) $(DOC_MAN1) $(DOC_MAN5): $(ASCIIDOC_CONF) install: check-man-tools install-man install-html check-man-tools: ifdef missing_tools $(error "You need to install $(missing_tools) for man pages") endif install-%.1: $(OUTPUT)%.1 $(Q)$(call do_install_docs,$<,$(man1dir),644); install-%.5: $(OUTPUT)%.5 $(Q)$(call do_install_docs,$<,$(man5dir),644); do-install-man: man $(addprefix install-,$(wildcard $(OUTPUT)*.1)) \ $(addprefix install-,$(wildcard $(OUTPUT)*.5)) install-man: man $(Q)$(MAKE) -C . do-install-man install-%.txt: $(OUTPUT)%.html $(Q)$(call do_install_docs,$<,$(htmldir),644); do-install-html: html $(addprefix install-,$(wildcard *.txt)) install-html: html do-install-html uninstall: uninstall-man uninstall-html uninstall-man: $(Q)$(RM) $(addprefix $(DESTDIR)$(man1dir)/,$(DOC_MAN1)) $(Q)$(RM) $(addprefix $(DESTDIR)$(man5dir)/,$(DOC_MAN5)) uninstall-html: $(Q)$(RM) $(addprefix $(DESTDIR)$(htmldir)/,$(MAN_HTML)) ifdef missing_tools DO_INSTALL_MAN = $(warning Please install $(missing_tools) to have the man pages installed) else DO_INSTALL_MAN = do-install-man endif CLEAN_FILES = \ $(MAN_XML) $(addsuffix +,$(MAN_XML)) \ $(MAN_HTML) $(addsuffix +,$(MAN_HTML)) \ $(DOC_MAN1) $(DOC_MAN5) *.1 *.5 clean: $(Q) $(RM) $(CLEAN_FILES) ifdef USE_ASCIIDOCTOR $(OUTPUT)%.1 : $(OUTPUT)%.1.txt $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ $(ASCIIDOC) -b manpage -d manpage \ $(ASCIIDOC_EXTRA) -atracecmd_version=$(TRACECMD_VERSION) -o $@+ $< && \ mv $@+ $@ $(OUTPUT)%.5 : $(OUTPUT)%.5.txt $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ $(ASCIIDOC) -b manpage -d manpage \ $(ASCIIDOC_EXTRA) -atracecmd_version=$(TRACECMD_VERSION) -o $@+ $< && \ mv $@+ $@ endif $(OUTPUT)%.1 : $(OUTPUT)%.1.xml $(QUIET_XMLTO)$(RM) $@ && \ $(XMLTO) -o $(OUTPUT). -m $(MANPAGE_XSL) $(XMLTO_EXTRA) man $<; \ $(OUTPUT)%.5 : $(OUTPUT)%.5.xml $(QUIET_XMLTO)$(RM) $@ && \ $(XMLTO) -o $(OUTPUT). -m $(MANPAGE_XSL) $(XMLTO_EXTRA) man $<; \ $(OUTPUT)%.xml : %.txt $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ $(ASCIIDOC) -b docbook -d manpage \ $(ASCIIDOC_EXTRA) -atracecmd_version=$(TRACECMD_VERSION) -o $@+ $< && \ mv $@+ $@ $(MAN_HTML): $(OUTPUT)%.html : %.txt $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \ $(ASCIIDOC) -b $(ASCIIDOC_HTML) -d manpage \ $(ASCIIDOC_EXTRA) -atracecmd_version=$(TRACECMD_VERSION) -o $@+ $< && \ mv $@+ $@ trace-cmd-v3.3.1/Documentation/trace-cmd/install-docs.sh.in000077500000000000000000000013501470231550600235550ustar00rootroot00000000000000#!/bin/bash # SPDX-License-Identifier: LGPL-2.1 # # Copyright (c) 2023 Daniel Wagner, SUSE LLC for section in 1 3 5; do while IFS= read -r -d '' man; do [ ! -d "${DESTDIR}@MANDIR@/man${section}" ] && install -d "${DESTDIR}@MANDIR@/man${section}" echo Installing "${man}" to "${DESTDIR}@MANDIR@/man${section}" install -m 0644 "${man}" "${DESTDIR}@MANDIR@/man${section}/" done< <(find "@SRCDIR@" -name "*\.${section}" -type f -print0) done while IFS= read -r -d '' html; do [ ! -d "${DESTDIR}@HTMLDIR@" ] && install -d "${DESTDIR}@HTMLDIR@" echo Installing "${html}" to "${DESTDIR}@HTMLDIR@" install -m 0644 "${html}" "${DESTDIR}@HTMLDIR@" done< <(find "@SRCDIR@" -name "*\.html" -type f -print0) trace-cmd-v3.3.1/Documentation/trace-cmd/meson.build000066400000000000000000000125071470231550600223650ustar00rootroot00000000000000# SPDX-License-Identifier: LGPL-2.1 # # Copyright (c) 2023 Daniel Wagner, SUSE LLC # input text file: man page section sources = { 'trace-cmd.1.txt': '1', 'trace-cmd-agent.1.txt': '1', 'trace-cmd-check-events.1.txt': '1', 'trace-cmd-clear.1.txt': '1', 'trace-cmd-convert.1.txt': '1', 'trace-cmd.dat.v6.5.txt': '5', 'trace-cmd.dat.v7.5.txt': '5', 'trace-cmd-dump.1.txt': '1', 'trace-cmd-extract.1.txt': '1', 'trace-cmd-hist.1.txt': '1', 'trace-cmd-list.1.txt': '1', 'trace-cmd-listen.1.txt': '1', 'trace-cmd-mem.1.txt': '1', 'trace-cmd-options.1.txt': '1', 'trace-cmd-profile.1.txt': '1', 'trace-cmd-record.1.txt': '1', 'trace-cmd-report.1.txt': '1', 'trace-cmd-reset.1.txt': '1', 'trace-cmd-restore.1.txt': '1', 'trace-cmd-set.1.txt': '1', 'trace-cmd-show.1.txt': '1', 'trace-cmd-snapshot.1.txt': '1', 'trace-cmd-split.1.txt': '1', 'trace-cmd-stack.1.txt': '1', 'trace-cmd-start.1.txt': '1', 'trace-cmd-stat.1.txt': '1', 'trace-cmd-stop.1.txt': '1', 'trace-cmd-stream.1.txt': '1', } confdir = meson.current_source_dir() + '/../' top_source_dir = meson.current_source_dir() + '/../../' # # For asciidoc ... # -7.1.2, no extra settings are needed. # 8.0-, set ASCIIDOC8. # # # For docbook-xsl ... # -1.68.1, set ASCIIDOC_NO_ROFF? (based on changelog from 1.73.0) # 1.69.0, no extra settings are needed? # 1.69.1-1.71.0, set DOCBOOK_SUPPRESS_SP? # 1.71.1, no extra settings are needed? # 1.72.0, set DOCBOOK_XSL_172. # 1.73.0-, set ASCIIDOC_NO_ROFF # # # If you had been using DOCBOOK_XSL_172 in an attempt to get rid # of 'the ".ft C" problem' in your generated manpages, and you # instead ended up with weird characters around callouts, try # using ASCIIDOC_NO_ROFF instead (it works fine with ASCIIDOC8). # if get_option('asciidoctor') asciidoc = find_program('asciidoctor') asciidoc_extra = ['-a', 'compat-mode'] asciidoc_extra += ['-I.'] asciidoc_extra += ['-r', 'asciidoctor-extensions'] asciidoc_extra += ['-a', 'mansource=libtraceevent'] asciidoc_extra += ['-a', 'manmanual="libtraceevent Manual"'] asciidoc_html = 'xhtml5' else asciidoc = find_program('asciidoc') asciidoc_extra = ['--unsafe'] asciidoc_extra += ['-f', confdir + 'asciidoc.conf'] asciidoc_html = 'xhtml11' r = run_command(asciidoc, '--version', check: true) v = r.stdout().strip() if v.version_compare('>=8.0') asciidoc_extra += ['-a', 'asciidoc7compatible'] endif endif manpage_xsl = confdir + 'manpage-normal.xsl' if get_option('docbook-xls-172') asciidoc_extra += ['-a', 'libtraceevent-asciidoc-no-roff'] manpage_xsl = confdir + 'manpage-1.72.xsl' elif get_option('asciidoc-no-roff') # docbook-xsl after 1.72 needs the regular XSL, but will not # pass-thru raw roff codes from asciidoc.conf, so turn them off. asciidoc_extra += ['-a', 'libtraceevent-asciidoc-no-roff'] endif xmlto = find_program('xmlto') xmlto_extra = [] if get_option('man-bold-literal') xmlto_extra += ['-m ', confdir + 'manpage-bold-literal.xsl'] endif if get_option('docbook-suppress-sp') xmlto_extra += ['-m ', confdir + 'manpage-suppress-sp.xsl'] endif gen = generator( asciidoc, output: '@BASENAME@.xml', arguments: [ '-b', 'docbook', '-d', 'manpage', '-a', 'libtraceevent_version=' + meson.project_version(), '-o', '@OUTPUT@'] + asciidoc_extra + ['@INPUT@']) man = [] html = [] foreach txt, section : sources # build man page(s) xml = gen.process(txt) man += custom_target( txt.underscorify() + '_man', input: xml, output: '@BASENAME@.' + section, command: [ xmlto, '-m', manpage_xsl, 'man', '-o', '@OUTPUT@'] + xmlto_extra + ['@INPUT@']) # build html pages html += custom_target( txt.underscorify() + '_html', input: txt, output: '@BASENAME@.html', command: [ asciidoc, '-b', asciidoc_html, '-d', 'manpage', '-a', 'libtraceevent_version=' + meson.project_version(), '-o', '@OUTPUT@'] + asciidoc_extra + ['@INPUT@']) endforeach # Install path workaround because: # # - xmlto might generate more than one file and we would to tell meson # about those output files. We could figure out which files are generated # (see sed match in check-manpages.sh). # # - The man page generation puts all the generated files under sub dirs # and it's not obvious how to tell Meson it should not do this without # causing the install step to fail (confusion where the generated files # are stored) # # - The documentation build is not part of the 'build' target. The user # has explicitly to trigger the doc build. Hence the documentation is # not added to the 'install' target. # # Thus just use a plain old shell script to move the generated files to the # right location. conf = configuration_data() conf.set('SRCDIR', meson.current_build_dir()) conf.set('MANDIR', mandir) conf.set('HTMLDIR', htmldir) configure_file( input: 'install-docs.sh.in', output: 'install-docs.sh', configuration: conf) meson.add_install_script( join_paths(meson.current_build_dir(), 'install-docs.sh')) trace-cmd-v3.3.1/Documentation/trace-cmd/trace-cmd-agent.1.txt000066400000000000000000000041651470231550600240570ustar00rootroot00000000000000TRACE-CMD-AGENT(1) ================== NAME ---- trace-cmd-agent - Run as an agent on a machine (to be controlled by another machine) SYNOPSIS -------- *trace-cmd agent* ['OPTIONS'] DESCRIPTION ----------- The trace-cmd(1) agent listens over a vsocket (for virtual machines) or a TCP port for connections to control the tracing of the machine. The agent will then start tracing on the local machine and pass the data to the controlling connection. OPTIONS ------- *-N* 'client':: Listen over TCP instead of a vsocket. Must pass in a client host name or IP address to allow connection to. It will only connect to the specified client. Note, any process on that client can control the agent. *This is a very insecure setting. Only use on a trusted network* *Only use if the client machine is totally trusted* *-p* 'port':: This option will specify the port to listen to. *-D*:: This options causes trace-cmd agent to go into a daemon mode and run in the background. *-P* 'cid':: Allow an agent to also act as a proxy server, where it can be run on a host and connect with a guest. 'cid' is the context ID (see *vsock*(7)) of the client (e.g., guest VM) it will allow to connect. *--verbose*[='level']:: Set the log level. Supported log levels are "none", "critical", "error", "warning", "info", "debug", "all" or their identifiers "0", "1", "2", "3", "4", "5", "6". Setting the log level to specific value enables all logs from that and all previous levels. The level will default to "info" if one is not specified. Example: enable all critical, error and warning logs trace-cmd listen --verbose=warning SEE ALSO -------- trace-cmd(1), trace-cmd-record(1), trace-cmd-report(1), trace-cmd-start(1), trace-cmd-stop(1), trace-cmd-extract(1), trace-cmd-reset(1), trace-cmd-split(1), trace-cmd-list(1) AUTHOR ------ Written by Steven Rostedt, RESOURCES --------- https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/ COPYING ------- Copyright \(C) 2010 Red Hat, Inc. Free use of this software is granted under the terms of the GNU Public License (GPL). trace-cmd-v3.3.1/Documentation/trace-cmd/trace-cmd-attach.1.txt000066400000000000000000000103521470231550600242200ustar00rootroot00000000000000TRACE-CMD-ATTACH(1) =================== NAME ---- trace-cmd-attach - attach a guest trace.dat file to a host trace.dat file SYNOPSIS -------- *trace-cmd attach* ['OPTIONS'] host-trace-file guest-trace-file guest-pid [guest-pid ...] DESCRIPTION ----------- The trace-cmd(1) attach is used to take a trace.dat file created on a guest and attach it to a trace.dat file that was created on the host. In most cases, trace-cmd-agent(1) can be used to automate this, but if for some reason, the agent isn't appropriate, it may be required to start trace-cmd recording on the guest with trace-cmd-record(1). If the host recording is activated at the same time, one can use trace-cmd attach(1) to connect the guest and host files as if they were created by the trace-cmd agent. *host-trace-file*:: The trace.dat file created by the host. Must have kvm_exit and kvm_entry events, and use the "tsc2nsec" clock. *guest-trace-file*:: The trace.dat file created by the guest. Must use the "x86-tsc" clock. For now, this is only supported on x86, it may support other achitectures later. *guest-pid*:: The process ID of the host thread that represents the guests threads. Each process ID that represents all of the guest vCPUs should be listed. Note, you can add more than just the threads that represent the guest vCPUs, as the tool will search the *host-trace-file* for kvm_exit and kvm_entry events to match these PIDs with the vCPUs that they represent. OPTIONS ------- *-c* 'cpus':: Specify a the number of CPUS the guest has. *-s* 'timeshift':: A comma separated list of the format _offset_,_scale_,_frac_,_timestamp_ These values map to what are given in /sys/kernel/kvm//vcpu/* *offset*: Is the offset of the guest. "tsc-offest" in the directory. Note that the value listed here is the negative of what is listed in the directory. *scale*: The scaling factor. "tsc-scaling-ratio" *frac*: The fraction bits. "tsc-scaling-ratio-frac-bits" *timestamp*: The timestamp to start using the above. In some cases, the values may change over time. By adding a timestamp, it will take effect after the timestamp has been hit. Normally this would be zero. Currently, only one timeshift is given per CPU. One *-s* option should be given for each CPU. If there are less options than CPUs, then the last option given will be used for the rest of the CPUs. If only one option is given, then the values for that option will be used for all CPUs. EXAMPLES -------- Enable all events for tracing: [source,shell] ---- $ # find the process for a given guest $ ps aux |grep qemu libvirt+ 63170 5.6 1.6 13994848 4257540 ? Sl May02 2884:49 /usr/bin/qemu-system-x86_64... $ # Use 63170 to find all the PIDs for the give guest $ ls /proc/63170/task 1541591 63170 63198 63209 63211 63213 63214 63215 63216 63217 63218 63219 63234 $ # Find the tsc offset $ su # cat /sys/kernel/debug/kvm/63170-15/vcpu0/tsc-offset -27950965013436847 # trace-cmd record -C tsc2nesc -e kvm -e sched -e irq -e timer # on guest: # trace-cmd record -C x86-tsc -e sched -e irq -e timer sleep 10 # back on host, hit Ctrl^C to stop tracing after the guest is done # # Make the trace.dat user owned by your user account. # chown user.user trace.dat # exit $ scp root@guest:trace.dat trace-guest.dat $ # now attach the two files (guest has 8 CPUs) $ trace-cmd attach -c 8 -s 27950965013436847 trace.dat trace-guest.dat 1541591 63170 63198 63209 63211 63213 63214 63215 63216 63217 63218 63219 63234 $ trace-cmd report -i trace.dat -i trace-guest.dat $ # now you should see the guest trace interleaved within the host trace. ---- SEE ALSO -------- trace-cmd(1), trace-cmd-report(1), trace-cmd-start(1), trace-cmd-stop(1), trace-cmd-extract(1), trace-cmd-reset(1), trace-cmd-split(1), trace-cmd-list(1), trace-cmd-listen(1), trace-cmd-profile(1) AUTHOR ------ Written by Steven Rostedt (Google) RESOURCES --------- https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/ COPYING ------- Copyright \(C) 2010 Red Hat, Inc. Free use of this software is granted under the terms of the GNU Public License (GPL). trace-cmd-v3.3.1/Documentation/trace-cmd/trace-cmd-check-events.1.txt000066400000000000000000000033141470231550600253330ustar00rootroot00000000000000TRACE-CMD-CHECK_EVENTS(1) ========================= NAME ---- trace-cmd-check-events - parse the event formats on local system SYNOPSIS -------- *trace-cmd check-events* ['OPTIONS'] DESCRIPTION ----------- The trace-cmd(1) check-events parses format strings for all the events on the local system. It returns whether all the format strings can be parsed correctly. It will load plugins unless specified otherwise. This is useful to check for any trace event format strings which may contain some internal kernel function references which cannot be decoded outside of the kernel. This may mean that either the unparsed format strings of the trace events need to be changed or that a plugin needs to be created to parse them. OPTIONS ------- *-N* - Don't load plugins *--verbose*[='level']:: Set the log level. Supported log levels are "none", "critical", "error", "warning", "info", "debug", "all" or their identifiers "0", "1", "2", "3", "4", "5", "6". Setting the log level to specific value enables all logs from that and all previous levels. The level will default to "info" if one is not specified. Example: enable all critical, error and warning logs trace-cmd check-events --verbose=warning SEE ALSO -------- trace-cmd(1), trace-cmd-record(1), trace-cmd-report(1), trace-cmd-stop(1), trace-cmd-extract(1), trace-cmd-reset(1), trace-cmd-split(1), trace-cmd-list(1), trace-cmd-listen(1), trace-cmd-start(1) AUTHOR ------ Written by Vaibhav Nagarnaik, RESOURCES --------- https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/ COPYING ------- Copyright \(C) 2011 Google, Inc. Free use of this software is granted under the terms of the GNU Public License (GPL). trace-cmd-v3.3.1/Documentation/trace-cmd/trace-cmd-clear.1.txt000066400000000000000000000023171470231550600240440ustar00rootroot00000000000000TRACE-CMD-CLEAR(1) ================= NAME ---- trace-cmd-clear - clear the Ftrace buffer. SYNOPSIS -------- *trace-cmd clear* ['OPTIONS'] DESCRIPTION ----------- The *trace-cmd(1) clear* clears the content of the Ftrace ring buffer. OPTIONS ------- *-B* 'buffer-name':: If the kernel supports multiple buffers, this will clear only the given buffer. It does not affect any other buffers. This may be used multiple times to specify different buffers. The top level buffer will not be clearded if this option is given. *-a*:: Clear all existing buffers, including the top level one. SEE ALSO -------- trace-cmd(1), trace-cmd-record(1), trace-cmd-report(1), trace-cmd-start(1), trace-cmd-extract(1), trace-cmd-reset(1), trace-cmd-split(1), trace-cmd-list(1), trace-cmd-listen(1) AUTHOR ------ [verse] -- *Steven Rostedt* , author of *trace-cmd*. *Tzvetomir Stoyanov* , author of this man page. -- REPORTING BUGS -------------- Report bugs to LICENSE ------- trace-cmd is Free Software licensed under the terms of the GNU Public License (GPL). RESOURCES --------- https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/trace-cmd-v3.3.1/Documentation/trace-cmd/trace-cmd-convert.1.txt000066400000000000000000000033471470231550600244420ustar00rootroot00000000000000TRACE-CMD-CONVERT(1) =================== NAME ---- trace-cmd-convert - convert trace files SYNOPSIS -------- *trace-cmd convert* ['OPTIONS'] ['output-file'] DESCRIPTION ----------- The trace-cmd(1) convert command converts trace file. It reads the input file and copies the data into an output file. The output file may be in different format, depending on the command line arguments. The default output is in version 7 and compressed (if compiled with compression support). OPTIONS ------- *-i* 'input-file':: By default, trace-cmd convert will read the file 'trace.dat'. But the *-i* option open up the given 'input-file' instead. *-o* 'out-file':: The name of the output file, this parameter is mandatory. Note, the output file may also be specified as the last item on the command line. *--file-version*:: Desired version of the output file. Supported versions are 6 or 7. *--compression*:: Compression of the trace output file, one of these strings can be passed: 'any' - auto select the best available compression algorithm 'none' - do not compress the trace file 'name' - the name of the desired compression algorithms. Available algorithms can be listed with trace-cmd list -c *--help*:: Print usage information. EXAMPLES -------- # trace-cmd convert --compression any trace_compress.dat SEE ALSO -------- trace-cmd(1), trace-cmd.dat(1) AUTHOR ------ *Steven Rostedt* , author of *trace-cmd*. *Tzvetomir Stoyanov* , author of this man page. RESOURCES --------- https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/ COPYING ------- Copyright \(C) 2021 VMware. Free use of this software is granted under the terms of the GNU Public License (GPL). trace-cmd-v3.3.1/Documentation/trace-cmd/trace-cmd-dump.1.txt000066400000000000000000000102151470231550600237170ustar00rootroot00000000000000TRACE-CMD-DUMP(1) =================== NAME ---- trace-cmd-dump - show a meta data from a trace file, created by trace-cmd record SYNOPSIS -------- *trace-cmd dump* ['OPTIONS'] ['input-file'] DESCRIPTION ----------- The trace-cmd(1) dump command will display the meta data from a trace file created by trace-cmd record. OPTIONS ------- *-i* 'input-file':: By default, trace-cmd dump will read the file 'trace.dat'. But the *-i* option open up the given 'input-file' instead. Note, the input file may also be specified as the last item on the command line. *-v*, *--validate*:: Check if the input file is a valid trace file, created by trace-cmd. *--summary*:: Print a meta data summary - initial format and a short description of each file section. This is the default action, if no arguments are specified. *--head-page*:: Print the header page information, stored in the file. *--head-event*:: Print the event header information, stored in the file. *--ftrace-events*:: Print formats of ftrace specific events. *--systems*:: Print information of event systems, stored in the file - name and number of events for each system. *--events*:: Print formats of all events, stored in the file. *--kallsyms*:: Print information of the mapping of function addresses to the function names. *--printk*:: Print trace_printk() format strings, stored in the file. *--cmd-lines*:: Print mapping a PID to a process name. *--options*:: Print all options, stored in the file. *--flyrecord*:: Print the offset and the size of tracing data per each CPU. *--clock*:: Print the trace clock, used for timestamp of the tracing events, stored in the file. *--all*:: Print all meta data from the file. *--help*:: Print usage information. *--verbose*[='level']:: Set the log level. Supported log levels are "none", "critical", "error", "warning", "info", "debug", "all" or their identifiers "0", "1", "2", "3", "4", "5", "6". Setting the log level to specific value enables all logs from that and all previous levels. The level will default to "info" if one is not specified. Example: enable all critical, error and warning logs trace-cmd report --verbose=warning EXAMPLES -------- [source,shell] ---- # trace-cmd dump --summary -i trace.dat Tracing meta data in file trace.dat: [Initial format] 6 [Version] 0 [Little endian] 8 [Bytes in a long] 4096 [Page size, bytes] [Header info, 205 bytes] [Header event, 205 bytes] [Ftrace format, 15 events] [Events format, 2 systems] [Kallsyms, 7144493 bytes] [Trace printk, 2131 bytes] [Saved command lines, 117 bytes] 8 [CPUs with tracing data] [12 options] [Flyrecord tracing data] ---- [source,shell] ---- # trace-cmd dump --flyrecord -i trace.dat [Flyrecord tracing data] 7176192 0 [offset, size of cpu 0] 7176192 0 [offset, size of cpu 1] 7176192 0 [offset, size of cpu 2] 7176192 4096 [offset, size of cpu 3] 7180288 4096 [offset, size of cpu 4] 7184384 0 [offset, size of cpu 5] 7184384 0 [offset, size of cpu 6] 7184384 0 [offset, size of cpu 7] ---- [source,shell] ---- # trace-cmd dump --summary --systems -i trace.dat Tracing meta data in file trace.dat: [Initial format] 6 [Version] 0 [Little endian] 8 [Bytes in a long] 4096 [Page size, bytes] [Header info, 205 bytes] [Header event, 205 bytes] [Ftrace format, 15 events] [Events format, 3 systems] sched 23 [system, events] irq 5 [system, events] kvm 70 [system, events] [Kallsyms, 7144493 bytes] [Trace printk, 2131 bytes] [Saved command lines, 157 bytes] 8 [CPUs with tracing data] [11 options] [Flyrecord tracing data] ---- [source,shell] ---- # trace-cmd dump --summary --systems -i trace.dat File trace.dat is a valid trace-cmd file ---- SEE ALSO -------- trace-cmd(1), trace-cmd.dat(1) AUTHOR ------ *Steven Rostedt* , author of *trace-cmd*. *Tzvetomir Stoyanov* , author of this man page. RESOURCES --------- https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/ COPYING ------- Copyright \(C) 2010 Red Hat, Inc. Free use of this software is granted under the terms of the GNU Public License (GPL). trace-cmd-v3.3.1/Documentation/trace-cmd/trace-cmd-extract.1.txt000066400000000000000000000071051470231550600244300ustar00rootroot00000000000000TRACE-CMD-EXTRACT(1) ==================== NAME ---- trace-cmd-extract - extract out the data from the Ftrace Linux tracer. SYNOPSIS -------- *trace-cmd extract ['OPTIONS']* DESCRIPTION ----------- The trace-cmd(1) extract is usually used after 'trace-cmd-start(1)' and 'trace-cmd-stop(1)'. It can be used after the Ftrace tracer has been started manually through the Ftrace pseudo file system. The extract command creates a trace.dat file that can be used by 'trace-cmd-report(1)' to read from. It reads the kernel internal ring buffer to produce the trace.dat file. OPTIONS ------- *-p* 'plugin':: Although *extract* does not start any traces, some of the plugins require just reading the output in ASCII format. These are the latency tracers, since the latency tracers have a separate internal buffer. The plugin option is therefore only necessary for the 'wakeup', 'wakeup-rt', 'irqsoff', 'preemptoff' and 'preemptirqsoff' plugins. With out this option, the extract command will extract from the internal Ftrace buffers. *-O* 'option':: If a latency tracer is being extracted, and the *-p* option is used, then there are some Ftrace options that can change the format. This will update those options before extracting. To see the list of options see 'trace-cmd-list'. To enable an option, write its name, to disable the option append the characters 'no' to it. For example: 'noprint-parent' will disable the 'print-parent' option that prints the parent function in printing a function event. *-o* 'outputfile':: By default, the extract command will create a 'trace.dat' file. This option will change where the file is written to. *-s*:: Extract from the snapshot buffer (if the kernel supports it). *--date*:: This is the same as the trace-cmd-record(1) --date option, but it does cause the extract routine to disable all tracing. That is, the end of the extract will perform something similar to trace-cmd-reset(1). *-B* 'buffer-name':: If the kernel supports multiple buffers, this will extract the trace for only the given buffer. It does not affect any other buffer. This may be used multiple times to specify different buffers. When this option is used, the top level instance will not be extracted unless *-t* is given. *-a*:: Extract all existing buffer instances. When this option is used, the top level instance will not be extracted unless *-t* is given. *-t*:: Extracts the top level instance buffer. Without the *-B* or *-a* option this is the same as the default. But if *-B* or *-a* is used, this is required if the top level instance buffer should also be extracted. *--verbose*[='level']:: Set the log level. Supported log levels are "none", "critical", "error", "warning", "info", "debug", "all" or their identifiers "0", "1", "2", "3", "4", "5", "6". Setting the log level to specific value enables all logs from that and all previous levels. The level will default to "info" if one is not specified. Example: enable all critical, error and warning logs trace-cmd extract --verbose=warning SEE ALSO -------- trace-cmd(1), trace-cmd-record(1), trace-cmd-report(1), trace-cmd-start(1), trace-cmd-stop(1), trace-cmd-reset(1), trace-cmd-split(1), trace-cmd-list(1), trace-cmd-listen(1) AUTHOR ------ Written by Steven Rostedt, RESOURCES --------- https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/ COPYING ------- Copyright \(C) 2010 Red Hat, Inc. Free use of this software is granted under the terms of the GNU Public License (GPL). trace-cmd-v3.3.1/Documentation/trace-cmd/trace-cmd-hist.1.txt000066400000000000000000000027231470231550600237260ustar00rootroot00000000000000TRACE-CMD-HIST(1) ================= NAME ---- trace-cmd-hist - show histogram of events in trace.dat file SYNOPSIS -------- *trace-cmd hist* ['OPTIONS']['input-file'] DESCRIPTION ----------- The trace-cmd(1) hist displays a histogram form from the trace.dat file. Instead of showing the events as they were ordered, it creates a histogram that can be displayed per task or for all tasks where the most common events appear first. It uses the function tracer and call stacks that it finds to try to put together a call graph of the events. OPTIONS ------- *-i* 'input-file':: By default, trace-cmd hist will read the file 'trace.dat'. But the *-i* option open up the given 'input-file' instead. Note, the input file may also be specified as the last item on the command line. *-P*:: To compact all events and show the call graphs by ignoring tasks and different PIDs, add the *-P* to do so. Instead of showing the task name, it will group all chains together and show "". SEE ALSO -------- trace-cmd(1), trace-cmd-record(1), trace-cmd-report(1), trace-cmd-start(1), trace-cmd-stop(1), trace-cmd-extract(1), trace-cmd-reset(1), trace-cmd-split(1), trace-cmd-listen(1) AUTHOR ------ Written by Steven Rostedt, RESOURCES --------- https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/ COPYING ------- Copyright \(C) 2010 Red Hat, Inc. Free use of this software is granted under the terms of the GNU Public License (GPL). trace-cmd-v3.3.1/Documentation/trace-cmd/trace-cmd-list.1.txt000066400000000000000000000046061470231550600237340ustar00rootroot00000000000000TRACE-CMD-LIST(1) ================= NAME ---- trace-cmd-list - list available plugins, events or options for Ftrace. SYNOPSIS -------- *trace-cmd list* ['OPTIONS'] DESCRIPTION ----------- The trace-cmd(1) list displays the available plugins, events or Ftrace options that are configured on the current machine. If no option is given, then it lists all plugins, event systems, events and Ftrace options to standard output. OPTIONS ------- *-e* ['regex']:: This option will list the available events that are enabled on the local system. It takes an optional argument that uses 'regcomp(3)' expressions to seach. trace-cmd list -e '^sys.*' *-F*:: Used with *-e* 'regex' to show the fields of the event. *--full*:: Used with *-F* which will show the "print fmt" of the event along with the fields. *-l*:: Used with *-e* 'regex' to show those events filters. *-R*:: Used with *-e* 'regex' to show those events triggers. *-s*:: This option will list the available event systems. *-t*:: This option will list the available tracers that are enabled on the local system. *-p*:: Same as *-t* and only for legacy purposes. *-o*:: This option will list the available Ftrace options that are configured on the local system. *-f* ['regex']:: This option will list the available filter functions. These are the list of functions on the system that you can trace, or filter on. It takes an optional argument that uses 'regcomp(3)' expressions to seach. trace-cmd list -f '^sched.*' *-P*:: List the plugin files that get loaded on trace-cmd report. *-O*:: List plugin options that can be used by trace-cmd report *-O* option. *-B*:: List defined buffer instances (sub buffers). *-C*:: List defined clocks that can be used with trace-cmd record -C. The one in brackets ([]) is the active clock. *-c*:: List the available trace file compression algorithms. SEE ALSO -------- trace-cmd(1), trace-cmd-record(1), trace-cmd-report(1), trace-cmd-start(1), trace-cmd-stop(1), trace-cmd-extract(1), trace-cmd-reset(1), trace-cmd-split(1), trace-cmd-listen(1) AUTHOR ------ Written by Steven Rostedt, RESOURCES --------- https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/ COPYING ------- Copyright \(C) 2010 Red Hat, Inc. Free use of this software is granted under the terms of the GNU Public License (GPL). trace-cmd-v3.3.1/Documentation/trace-cmd/trace-cmd-listen.1.txt000066400000000000000000000041141470231550600242510ustar00rootroot00000000000000TRACE-CMD-LISTEN(1) =================== NAME ---- trace-cmd-listen - listen for incoming connection to record tracing. SYNOPSIS -------- *trace-cmd listen* -p 'port' ['OPTIONS'] DESCRIPTION ----------- The trace-cmd(1) listen sets up a port to listen to waiting for connections from other hosts that run 'trace-cmd-record(1)' with the *-N* option. When a connection is made, and the remote host sends data, it will create a file called 'trace.HOST:PORT.dat'. Where HOST is the name of the remote host, and PORT is the port that the remote host used to connect with. OPTIONS ------- *-p* 'port':: This option will specify the port to listen to. *-D*:: This options causes trace-cmd listen to go into a daemon mode and run in the background. *-V*:: Listen on a vsocket instead. This is useful for tracing between host and guest VMs. *-d* 'dir':: This option specifies a directory to write the data files into. *-o* 'filename':: This option overrides the default 'trace' in the 'trace.HOST:PORT.dat' that is created when a remote host connects. *-l* 'filename':: This option writes the output messages to a log file instead of standard output. *--verbose*[='level']:: Set the log level. Supported log levels are "none", "critical", "error", "warning", "info", "debug", "all" or their identifiers "0", "1", "2", "3", "4", "5", "6". Setting the log level to specific value enables all logs from that and all previous levels. The level will default to "info" if one is not specified. Example: enable all critical, error and warning logs trace-cmd listen --verbose=warning SEE ALSO -------- trace-cmd(1), trace-cmd-record(1), trace-cmd-report(1), trace-cmd-start(1), trace-cmd-stop(1), trace-cmd-extract(1), trace-cmd-reset(1), trace-cmd-split(1), trace-cmd-list(1) AUTHOR ------ Written by Steven Rostedt, RESOURCES --------- https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/ COPYING ------- Copyright \(C) 2010 Red Hat, Inc. Free use of this software is granted under the terms of the GNU Public License (GPL). trace-cmd-v3.3.1/Documentation/trace-cmd/trace-cmd-mem.1.txt000066400000000000000000000071171470231550600235370ustar00rootroot00000000000000TRACE-CMD-MEM(1) ================ NAME ---- trace-cmd-mem - show memory usage of certain kmem events SYNOPSIS -------- *trace-cmd mem* ['OPTIONS']['input-file'] DESCRIPTION ----------- The trace-cmd(1) mem requires a trace-cmd record that enabled the following events: kmalloc kmalloc_node kfree kmem_cache_alloc kmem_cache_alloc_node kmem_cache_alloc_free It then reads the amount requested and the ammount freed as well as the functions that called the allocation. It then reports the final amount of bytes requested and allocated, along with the total amount allocated and requested, as well as the max allocation and requested during the run. It reports the amount of wasted bytes (allocated - requested) that was not freed, as well as the max wasted amount during the run. The list is sorted by descending order of wasted bytes after the run. Function Waste Alloc req TotAlloc TotReq MaxAlloc MaxReq MaxWaste -------- ----- ----- --- -------- ------ -------- ------ -------- rb_allocate_cpu_buffer 768 2304 1536 2304 1536 2304 1536 768 alloc_pipe_info 400 1152 752 1152 752 1152 752 400 instance_mkdir 252 544 292 544 292 544 292 252 __d_alloc 215 1086560 1086345 1087208 1086993 1086560 1086345 215 get_empty_filp 72 2304 2232 4864 4712 4864 4712 152 mm_alloc 40 960 920 960 920 960 920 40 prepare_creds 32 192 160 1728 1440 1728 1440 288 tracing_buffers_open 8 32 24 32 24 32 24 8 do_brk 0 0 0 368 368 368 368 0 journal_add_journal_head 0 6048 6048 6048 6048 6048 6048 0 journal_start 0 0 0 1224 1224 48 48 0 __rb_allocate_pages 0 3289856 3289856 3289856 3289856 3289856 3289856 0 anon_vma_alloc 0 0 0 936 936 864 864 0 [...] OPTIONS ------- *-i* 'input-file':: By default, trace-cmd hist will read the file 'trace.dat'. But the *-i* option open up the given 'input-file' instead. Note, the input file may also be specified as the last item on the command line. SEE ALSO -------- trace-cmd(1), trace-cmd-record(1), trace-cmd-report(1), trace-cmd-start(1), trace-cmd-stop(1), trace-cmd-extract(1), trace-cmd-reset(1), trace-cmd-hist(1), trace-cmd-split(1), trace-cmd-listen(1) AUTHOR ------ Written by Steven Rostedt, RESOURCES --------- https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/ COPYING ------- Copyright \(C) 2013 Red Hat, Inc. Free use of this software is granted under the terms of the GNU Public License (GPL). trace-cmd-v3.3.1/Documentation/trace-cmd/trace-cmd-options.1.txt000066400000000000000000000014241470231550600244470ustar00rootroot00000000000000TRACE-CMD-OPTIONS(1) ==================== NAME ---- trace-cmd-options - list available options from trace-cmd plugins SYNOPSIS -------- *trace-cmd options* DESCRIPTION ----------- The trace-cmd(1) options command will examine all the trace-cmd plugins that are used by *trace-cmd report(1)* and list them. SEE ALSO -------- trace-cmd(1), trace-cmd-record(1), trace-cmd-start(1), trace-cmd-stop(1), trace-cmd-extract(1), trace-cmd-reset(1), trace-cmd-split(1), trace-cmd-list(1), trace-cmd-listen(1) AUTHOR ------ Written by Steven Rostedt, RESOURCES --------- https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/ COPYING ------- Copyright \(C) 2011 Red Hat, Inc. Free use of this software is granted under the terms of the GNU Public License (GPL). trace-cmd-v3.3.1/Documentation/trace-cmd/trace-cmd-profile.1.txt000066400000000000000000000770701470231550600244260ustar00rootroot00000000000000TRACE-CMD-PROFILE(1) ==================== NAME ---- trace-cmd-profile - profile tasks running live SYNOPSIS -------- *trace-cmd profile ['OPTIONS']* ['command'] DESCRIPTION ----------- The trace-cmd(1) profile will start tracing just like trace-cmd-record(1), with the *--profile* option, except that it does not write to a file, but instead, it will read the events as they happen and will update the accounting of the events. When the trace is finished, it will report the results just like trace-cmd-report(1) would do with its *--profile* option. In other words, the profile command does the work of trace-cmd record --profile, and trace-cmd report --profile without having to record the data to disk, in between. The advantage of using the profile command is that the profiling can be done over a long period of time where recording all events would take up too much disk space. This will enable several events as well as the function graph tracer with a depth of one (if the kernel supports it). This is to show where tasks enter and exit the kernel and how long they were in the kernel. To disable calling function graph, use the *-p* option to enable another tracer. To not enable any tracer, use *-p nop*. All timings are currently in nanoseconds. OPTIONS ------- These are the same as trace-cmd-record(1) with the *--profile* option. *-p* 'tracer':: Set a tracer plugin to run instead of function graph tracing set to depth of 1. To not run any tracer, use *-p nop*. *-S*:: Only enable the tracer or events speficied on the command line. With this option, the function_graph tracer is not enabled, nor are any events (like sched_switch), unless they are specifically specified on the command line (i.e. -p function -e sched_switch -e sched_wakeup) *-G*:: Set interrupt (soft and hard) events as global (associated to CPU instead of tasks). *-o* 'file':: Write the output of the profile to 'file'. This supersedes *--stderr* *-H* 'event-hooks':: Add custom event matching to connect any two events together. Format is: [:],[,]/ [:],[,] The start_system:start_event (start_system is optional), is the event that starts the timing. start_match is the field in the start event that is to match with the end_match in the end event. start_pid is optional, as matches are attached to the tasks that run the events, if another field should be used to find that task, then it is specified with start_pid. end_system:end_event is the event that ends the timing (end_system is optional). end_match is the field in end_match that wil match the start event field start_match. flags are optional and can be the following (case insensitive): p : The two events are pinned to the same CPU (start and end happen on the same CPU always). s : The event should have a stack traced with it (enable stack tracing for the start event). g : The event is global (not associated to a task). start_pid is not applicable with this flag. *--stderr*:: Redirect the output to stderr. The output of the command being executed is not changed. This allows watching the command execute and saving the output of the profile to another file. *--verbose*[='level']:: Set the log level. Supported log levels are "none", "critical", "error", "warning", "info", "debug", "all" or their identifiers "0", "1", "2", "3", "4", "5", "6". Setting the log level to specific value enables all logs from that and all previous levels. The level will default to "info" if one is not specified. Example: enable all critical, error and warning logs trace-cmd profile --verbose=warning EXAMPLES -------- --- # trace-cmd profile -F sleep 1 [..] task: sleep-1121 Event: sched_switch:R (2) Total: 234559 Avg: 117279 Max: 129886 Min:104673 | + ftrace_raw_event_sched_switch (0xffffffff8109f310) 100% (2) time:234559 max:129886 min:104673 avg:117279 __schedule (0xffffffff816c1e81) preempt_schedule (0xffffffff816c236e) ___preempt_schedule (0xffffffff81351a59) | + unmap_single_vma (0xffffffff81198c05) | 55% (1) time:129886 max:129886 min:0 avg:129886 | stop_one_cpu (0xffffffff8110909a) | sched_exec (0xffffffff810a119b) | do_execveat_common.isra.31 (0xffffffff811de528) | do_execve (0xffffffff811dea8c) | SyS_execve (0xffffffff811ded1e) | return_to_handler (0xffffffff816c8458) | stub_execve (0xffffffff816c6929) | + unmap_single_vma (0xffffffff81198c05) 45% (1) time:104673 max:104673 min:0 avg:104673 unmap_vmas (0xffffffff81199174) exit_mmap (0xffffffff811a1f5b) mmput (0xffffffff8107699a) flush_old_exec (0xffffffff811ddb75) load_elf_binary (0xffffffff812287df) search_binary_handler (0xffffffff811dd3e0) do_execveat_common.isra.31 (0xffffffff811de8bd) do_execve (0xffffffff811dea8c) SyS_execve (0xffffffff811ded1e) return_to_handler (0xffffffff816c8458) stub_execve (0xffffffff816c6929) Event: sched_switch:S (1) Total: 1000513242 Avg: 1000513242 Max: 1000513242 Min:1000513242 | + ftrace_raw_event_sched_switch (0xffffffff8109f310) 100% (1) time:1000513242 max:1000513242 min:0 avg:1000513242 __schedule (0xffffffff816c1e81) schedule (0xffffffff816c23b9) do_nanosleep (0xffffffff816c4f1c) hrtimer_nanosleep (0xffffffff810dcd86) SyS_nanosleep (0xffffffff810dcea6) return_to_handler (0xffffffff816c8458) tracesys_phase2 (0xffffffff816c65b0) Event: sched_wakeup:1121 (1) Total: 43405 Avg: 43405 Max: 43405 Min:43405 | + ftrace_raw_event_sched_wakeup_template (0xffffffff8109d960) 100% (1) time:43405 max:43405 min:0 avg:43405 ttwu_do_wakeup (0xffffffff810a01a2) ttwu_do_activate.constprop.122 (0xffffffff810a0236) try_to_wake_up (0xffffffff810a3ec3) wake_up_process (0xffffffff810a4057) hrtimer_wakeup (0xffffffff810db772) __run_hrtimer (0xffffffff810dbd91) hrtimer_interrupt (0xffffffff810dc6b7) local_apic_timer_interrupt (0xffffffff810363e7) smp_trace_apic_timer_interrupt (0xffffffff816c8c6a) trace_apic_timer_interrupt (0xffffffff816c725a) finish_task_switch (0xffffffff8109c3a4) __schedule (0xffffffff816c1e01) schedule (0xffffffff816c23b9) ring_buffer_wait (0xffffffff811323a3) wait_on_pipe (0xffffffff81133d93) tracing_buffers_splice_read (0xffffffff811350b0) do_splice_to (0xffffffff8120476f) SyS_splice (0xffffffff81206c1f) tracesys_phase2 (0xffffffff816c65b0) Event: func: sys_nanosleep() (1) Total: 1000598016 Avg: 1000598016 Max: 1000598016 Min:1000598016 Event: func: sys_munmap() (1) Total: 14300 Avg: 14300 Max: 14300 Min:14300 Event: func: sys_arch_prctl() (1) Total: 571 Avg: 571 Max: 571 Min:571 Event: func: sys_mprotect() (4) Total: 14382 Avg: 3595 Max: 7196 Min:2190 Event: func: SyS_read() (1) Total: 2640 Avg: 2640 Max: 2640 Min:2640 Event: func: sys_close() (5) Total: 4001 Avg: 800 Max: 1252 Min:414 Event: func: sys_newfstat() (3) Total: 11684 Avg: 3894 Max: 10206 Min:636 Event: func: SyS_open() (3) Total: 23615 Avg: 7871 Max: 10535 Min:4743 Event: func: sys_access() (1) Total: 5924 Avg: 5924 Max: 5924 Min:5924 Event: func: SyS_mmap() (8) Total: 39153 Avg: 4894 Max: 12354 Min:1518 Event: func: smp_trace_apic_timer_interrupt() (1) Total: 10298 Avg: 10298 Max: 10298 Min:10298 Event: func: SyS_brk() (4) Total: 2407 Avg: 601 Max: 1564 Min:206 Event: func: do_notify_resume() (2) Total: 4095 Avg: 2047 Max: 2521 Min:1574 Event: func: sys_execve() (5) Total: 1625251 Avg: 325050 Max: 1605698 Min:3570 | + ftrace_raw_event_sched_wakeup_template (0xffffffff8109d960) 100% (1) time:1605698 max:1605698 min:0 avg:1605698 ttwu_do_wakeup (0xffffffff810a01a2) ttwu_do_activate.constprop.122 (0xffffffff810a0236) try_to_wake_up (0xffffffff810a3ec3) wake_up_process (0xffffffff810a4057) cpu_stop_queue_work (0xffffffff81108df8) stop_one_cpu (0xffffffff8110909a) sched_exec (0xffffffff810a119b) do_execveat_common.isra.31 (0xffffffff811de528) do_execve (0xffffffff811dea8c) SyS_execve (0xffffffff811ded1e) return_to_handler (0xffffffff816c8458) stub_execve (0xffffffff816c6929) stub_execve (0xffffffff816c6929) Event: func: syscall_trace_enter_phase2() (38) Total: 21544 Avg: 566 Max: 1066 Min:329 Event: func: syscall_trace_enter_phase1() (38) Total: 9202 Avg: 242 Max: 376 Min:150 Event: func: __do_page_fault() (53) Total: 257672 Avg: 4861 Max: 27745 Min:458 | + ftrace_raw_event_sched_wakeup_template (0xffffffff8109d960) 100% (1) time:27745 max:27745 min:0 avg:27745 ttwu_do_wakeup (0xffffffff810a01a2) ttwu_do_activate.constprop.122 (0xffffffff810a0236) try_to_wake_up (0xffffffff810a3ec3) default_wake_function (0xffffffff810a4002) autoremove_wake_function (0xffffffff810b50fd) __wake_up_common (0xffffffff810b4958) __wake_up (0xffffffff810b4cb8) rb_wake_up_waiters (0xffffffff8112f126) irq_work_run_list (0xffffffff81157d0f) irq_work_run (0xffffffff81157d5e) smp_trace_irq_work_interrupt (0xffffffff810082fc) trace_irq_work_interrupt (0xffffffff816c7aaa) return_to_handler (0xffffffff816c8458) trace_do_page_fault (0xffffffff810478b2) trace_page_fault (0xffffffff816c7dd2) Event: func: syscall_trace_leave() (38) Total: 26145 Avg: 688 Max: 1264 Min:381 Event: func: __sb_end_write() (1) Total: 373 Avg: 373 Max: 373 Min:373 Event: func: fsnotify() (1) Total: 598 Avg: 598 Max: 598 Min:598 Event: func: __fsnotify_parent() (1) Total: 286 Avg: 286 Max: 286 Min:286 Event: func: mutex_unlock() (2) Total: 39636 Avg: 19818 Max: 39413 Min:223 Event: func: smp_trace_irq_work_interrupt() (6) Total: 236459 Avg: 39409 Max: 100671 Min:634 | + ftrace_raw_event_sched_wakeup_template (0xffffffff8109d960) 100% (4) time:234348 max:100671 min:38745 avg:58587 ttwu_do_wakeup (0xffffffff810a01a2) ttwu_do_activate.constprop.122 (0xffffffff810a0236) try_to_wake_up (0xffffffff810a3ec3) default_wake_function (0xffffffff810a4002) autoremove_wake_function (0xffffffff810b50fd) __wake_up_common (0xffffffff810b4958) __wake_up (0xffffffff810b4cb8) rb_wake_up_waiters (0xffffffff8112f126) irq_work_run_list (0xffffffff81157d0f) irq_work_run (0xffffffff81157d5e) smp_trace_irq_work_interrupt (0xffffffff810082fc) return_to_handler (0xffffffff816c8458) trace_irq_work_interrupt (0xffffffff816c7aaa) | + ftrace_return_to_handler (0xffffffff81140840) | 84% (3) time:197396 max:100671 min:38745 avg:65798 | return_to_handler (0xffffffff816c846d) | trace_page_fault (0xffffffff816c7dd2) | + ftrace_return_to_handler (0xffffffff81140840) 16% (1) time:36952 max:36952 min:0 avg:36952 ftrace_graph_caller (0xffffffff816c8428) mutex_unlock (0xffffffff816c3f75) rb_simple_write (0xffffffff81133142) vfs_write (0xffffffff811d7727) SyS_write (0xffffffff811d7acf) tracesys_phase2 (0xffffffff816c65b0) Event: sys_enter:35 (1) Total: 1000599765 Avg: 1000599765 Max: 1000599765 Min:1000599765 Event: sys_enter:11 (1) Total: 55025 Avg: 55025 Max: 55025 Min:55025 Event: sys_enter:158 (1) Total: 1584 Avg: 1584 Max: 1584 Min:1584 Event: sys_enter:10 (4) Total: 18359 Avg: 4589 Max: 8764 Min:2933 Event: sys_enter:0 (1) Total: 4223 Avg: 4223 Max: 4223 Min:4223 Event: sys_enter:3 (5) Total: 9948 Avg: 1989 Max: 2606 Min:1203 Event: sys_enter:5 (3) Total: 15530 Avg: 5176 Max: 11840 Min:1405 Event: sys_enter:2 (3) Total: 28002 Avg: 9334 Max: 12035 Min:5656 Event: sys_enter:21 (1) Total: 7814 Avg: 7814 Max: 7814 Min:7814 Event: sys_enter:9 (8) Total: 49583 Avg: 6197 Max: 14137 Min:2362 Event: sys_enter:12 (4) Total: 108493 Avg: 27123 Max: 104079 Min:922 Event: sys_enter:59 (5) Total: 1631608 Avg: 326321 Max: 1607529 Min:4563 Event: page_fault_user:0x398d86b630 (1) Event: page_fault_user:0x398d844de0 (1) Event: page_fault_user:0x398d8d9020 (1) Event: page_fault_user:0x1d37008 (1) Event: page_fault_user:0x7f0b89e91074 (1) Event: page_fault_user:0x7f0b89d98ed0 (1) Event: page_fault_user:0x7f0b89ec8950 (1) Event: page_fault_user:0x7f0b89d83644 (1) Event: page_fault_user:0x7f0b89d622a8 (1) Event: page_fault_user:0x7f0b89d5a560 (1) Event: page_fault_user:0x7f0b89d34010 (1) Event: page_fault_user:0x1d36008 (1) Event: page_fault_user:0x398d900510 (1) Event: page_fault_user:0x398dbb3ae8 (1) Event: page_fault_user:0x398d87f490 (1) Event: page_fault_user:0x398d8eb660 (1) Event: page_fault_user:0x398d8bd730 (1) Event: page_fault_user:0x398d9625d9 (1) Event: page_fault_user:0x398d931810 (1) Event: page_fault_user:0x398dbb7114 (1) Event: page_fault_user:0x398d837610 (1) Event: page_fault_user:0x398d89e860 (1) Event: page_fault_user:0x398d8f23b0 (1) Event: page_fault_user:0x398dbb4510 (1) Event: page_fault_user:0x398dbad6f0 (1) Event: page_fault_user:0x398dbb1018 (1) Event: page_fault_user:0x398d977b37 (1) Event: page_fault_user:0x398d92eb60 (1) Event: page_fault_user:0x398d8abff0 (1) Event: page_fault_user:0x398dbb0d30 (1) Event: page_fault_user:0x398dbb6c24 (1) Event: page_fault_user:0x398d821c50 (1) Event: page_fault_user:0x398dbb6c20 (1) Event: page_fault_user:0x398d886350 (1) Event: page_fault_user:0x7f0b90125000 (1) Event: page_fault_user:0x7f0b90124740 (1) Event: page_fault_user:0x7f0b90126000 (1) Event: page_fault_user:0x398d816230 (1) Event: page_fault_user:0x398d8002b8 (1) Event: page_fault_user:0x398dbb0b40 (1) Event: page_fault_user:0x398dbb2880 (1) Event: page_fault_user:0x7f0b90141cc6 (1) Event: page_fault_user:0x7f0b9013b85c (1) Event: page_fault_user:0x7f0b90127000 (1) Event: page_fault_user:0x606e70 (1) Event: page_fault_user:0x7f0b90144010 (1) Event: page_fault_user:0x7fffcb31b038 (1) Event: page_fault_user:0x606da8 (1) Event: page_fault_user:0x400040 (1) Event: page_fault_user:0x398d222218 (1) Event: page_fault_user:0x398d015120 (1) Event: page_fault_user:0x398d220ce8 (1) Event: page_fault_user:0x398d220b80 (1) Event: page_fault_user:0x7fffcb2fcff8 (1) Event: page_fault_user:0x398d001590 (1) Event: page_fault_user:0x398d838490 (1) Event: softirq_raise:RCU (3) Total: 252931 Avg: 84310 Max: 243288 Min:4639 Event: softirq_raise:SCHED (2) Total: 241249 Avg: 120624 Max: 239076 Min:2173 | + ftrace_raw_event_sched_wakeup_template (0xffffffff8109d960) 100% (1) time:239076 max:239076 min:0 avg:239076 ttwu_do_wakeup (0xffffffff810a01a2) ttwu_do_activate.constprop.122 (0xffffffff810a0236) try_to_wake_up (0xffffffff810a3ec3) default_wake_function (0xffffffff810a4002) autoremove_wake_function (0xffffffff810b50fd) __wake_up_common (0xffffffff810b4958) __wake_up (0xffffffff810b4cb8) rb_wake_up_waiters (0xffffffff8112f126) irq_work_run_list (0xffffffff81157d0f) irq_work_run (0xffffffff81157d5e) smp_trace_irq_work_interrupt (0xffffffff810082fc) trace_irq_work_interrupt (0xffffffff816c7aaa) irq_exit (0xffffffff8107dd66) smp_trace_apic_timer_interrupt (0xffffffff816c8c7a) trace_apic_timer_interrupt (0xffffffff816c725a) prepare_ftrace_return (0xffffffff8103d4fd) ftrace_graph_caller (0xffffffff816c8428) mem_cgroup_begin_page_stat (0xffffffff811cfd25) page_remove_rmap (0xffffffff811a4fc5) stub_execve (0xffffffff816c6929) unmap_single_vma (0xffffffff81198b1c) unmap_vmas (0xffffffff81199174) exit_mmap (0xffffffff811a1f5b) mmput (0xffffffff8107699a) flush_old_exec (0xffffffff811ddb75) load_elf_binary (0xffffffff812287df) search_binary_handler (0xffffffff811dd3e0) do_execveat_common.isra.31 (0xffffffff811de8bd) do_execve (0xffffffff811dea8c) SyS_execve (0xffffffff811ded1e) return_to_handler (0xffffffff816c8458) Event: softirq_raise:HI (3) Total: 72472 Avg: 24157 Max: 64186 Min:3430 Event: softirq_entry:RCU (2) Total: 3191 Avg: 1595 Max: 1788 Min:1403 | + ftrace_raw_event_sched_wakeup_template (0xffffffff8109d960) 100% (1) time:1788 max:1788 min:0 avg:1788 ttwu_do_wakeup (0xffffffff810a01a2) ttwu_do_activate.constprop.122 (0xffffffff810a0236) try_to_wake_up (0xffffffff810a3ec3) default_wake_function (0xffffffff810a4002) autoremove_wake_function (0xffffffff810b50fd) __wake_up_common (0xffffffff810b4958) __wake_up (0xffffffff810b4cb8) rb_wake_up_waiters (0xffffffff8112f126) irq_work_run_list (0xffffffff81157d0f) irq_work_run (0xffffffff81157d5e) smp_trace_irq_work_interrupt (0xffffffff810082fc) trace_irq_work_interrupt (0xffffffff816c7aaa) irq_work_queue (0xffffffff81157e95) ring_buffer_unlock_commit (0xffffffff8113039f) __buffer_unlock_commit (0xffffffff811367d5) trace_buffer_unlock_commit (0xffffffff811376a2) ftrace_event_buffer_commit (0xffffffff81146d5f) ftrace_raw_event_sched_process_exec (0xffffffff8109c511) do_execveat_common.isra.31 (0xffffffff811de9a3) do_execve (0xffffffff811dea8c) SyS_execve (0xffffffff811ded1e) return_to_handler (0xffffffff816c8458) stub_execve (0xffffffff816c6929) Event: softirq_entry:SCHED (2) Total: 2289 Avg: 1144 Max: 1350 Min:939 Event: softirq_entry:HI (3) Total: 180146 Avg: 60048 Max: 178969 Min:499 | + ftrace_raw_event_sched_wakeup_template (0xffffffff8109d960) 100% (1) time:178969 max:178969 min:0 avg:178969 ttwu_do_wakeup (0xffffffff810a01a2) ttwu_do_activate.constprop.122 (0xffffffff810a0236) try_to_wake_up (0xffffffff810a3ec3) wake_up_process (0xffffffff810a4057) wake_up_worker (0xffffffff8108de74) insert_work (0xffffffff8108fca6) __queue_work (0xffffffff8108fe12) delayed_work_timer_fn (0xffffffff81090088) call_timer_fn (0xffffffff810d8f89) run_timer_softirq (0xffffffff810da8a1) __do_softirq (0xffffffff8107d8fa) irq_exit (0xffffffff8107dd66) smp_trace_apic_timer_interrupt (0xffffffff816c8c7a) trace_apic_timer_interrupt (0xffffffff816c725a) prepare_ftrace_return (0xffffffff8103d4fd) ftrace_graph_caller (0xffffffff816c8428) mem_cgroup_begin_page_stat (0xffffffff811cfd25) page_remove_rmap (0xffffffff811a4fc5) stub_execve (0xffffffff816c6929) unmap_single_vma (0xffffffff81198b1c) unmap_vmas (0xffffffff81199174) exit_mmap (0xffffffff811a1f5b) mmput (0xffffffff8107699a) flush_old_exec (0xffffffff811ddb75) load_elf_binary (0xffffffff812287df) search_binary_handler (0xffffffff811dd3e0) do_execveat_common.isra.31 (0xffffffff811de8bd) do_execve (0xffffffff811dea8c) SyS_execve (0xffffffff811ded1e) return_to_handler (0xffffffff816c8458) --- The above uses *-F* to follow the sleep task. It filters only on events that pertain to sleep. Note, in order to follow forks, you need to also include the *-c* flag. Other tasks will appear in the profile as well if events reference more than one task (like sched_switch and sched_wakeup do. The "prev_pid" and "next_pid" of sched_switch, and the "common_pid" and "pid" of sched_wakeup). Stack traces are attached to events that are related to them. Taking a look at the above output: Event: sched_switch:R (2) Total: 234559 Avg: 117279 Max: 129886 Min:104673 This shows that task was preempted (it's in the running 'R' state). It was preempted twice '(2)' for a total of 234,559 nanoseconds, with a average preempt time of 117,279 ns, and maximum of 128,886 ns and minimum of 104,673 ns. The tree shows where it was preempted: | + ftrace_raw_event_sched_switch (0xffffffff8109f310) 100% (2) time:234559 max:129886 min:104673 avg:117279 __schedule (0xffffffff816c1e81) preempt_schedule (0xffffffff816c236e) ___preempt_schedule (0xffffffff81351a59) | + unmap_single_vma (0xffffffff81198c05) | 55% (1) time:129886 max:129886 min:0 avg:129886 | stop_one_cpu (0xffffffff8110909a) | sched_exec (0xffffffff810a119b) | do_execveat_common.isra.31 (0xffffffff811de528) | do_execve (0xffffffff811dea8c) | SyS_execve (0xffffffff811ded1e) | return_to_handler (0xffffffff816c8458) | stub_execve (0xffffffff816c6929) | + unmap_single_vma (0xffffffff81198c05) 45% (1) time:104673 max:104673 min:0 avg:104673 unmap_vmas (0xffffffff81199174) exit_mmap (0xffffffff811a1f5b) mmput (0xffffffff8107699a) flush_old_exec (0xffffffff811ddb75) load_elf_binary (0xffffffff812287df) search_binary_handler (0xffffffff811dd3e0) do_execveat_common.isra.31 (0xffffffff811de8bd) do_execve (0xffffffff811dea8c) SyS_execve (0xffffffff811ded1e) return_to_handler (0xffffffff816c8458) stub_execve (0xffffffff816c6929) Event: sched_switch:S (1) Total: 1000513242 Avg: 1000513242 Max: 1000513242 Min:10005132 This shows that the task was scheduled out in the INTERRUPTIBLE state once for a total of 1,000,513,242 ns (~1s), which makes sense as the task was a "sleep 1". After the schedule events, the function events are shown. By default the profiler will use the function graph tracer if the depth setting is supported by the kernel. It will set the depth to one which will only trace the first function that enters the kernel. It will also record the amount of time it was in the kernel. Event: func: sys_nanosleep() (1) Total: 1000598016 Avg: 1000598016 Max: 1000598016 Min:1000598016 Event: func: sys_munmap() (1) Total: 14300 Avg: 14300 Max: 14300 Min:14300 Event: func: sys_arch_prctl() (1) Total: 571 Avg: 571 Max: 571 Min:571 Event: func: sys_mprotect() (4) Total: 14382 Avg: 3595 Max: 7196 Min:2190 Event: func: SyS_read() (1) Total: 2640 Avg: 2640 Max: 2640 Min:2640 Event: func: sys_close() (5) Total: 4001 Avg: 800 Max: 1252 Min:414 Event: func: sys_newfstat() (3) Total: 11684 Avg: 3894 Max: 10206 Min:636 Event: func: SyS_open() (3) Total: 23615 Avg: 7871 Max: 10535 Min:4743 Event: func: sys_access() (1) Total: 5924 Avg: 5924 Max: 5924 Min:5924 Event: func: SyS_mmap() (8) Total: 39153 Avg: 4894 Max: 12354 Min:1518 Event: func: smp_trace_apic_timer_interrupt() (1) Total: 10298 Avg: 10298 Max: 10298 Min:10298 Event: func: SyS_brk() (4) Total: 2407 Avg: 601 Max: 1564 Min:206 Event: func: do_notify_resume() (2) Total: 4095 Avg: 2047 Max: 2521 Min:1574 Event: func: sys_execve() (5) Total: 1625251 Avg: 325050 Max: 1605698 Min:3570 Count of times the event was hit is always in parenthesis '(5)'. The function graph trace may produce too much overhead as it is still triggering (just not tracing) on all functions. To limit functions just to system calls (not interrupts), add the following option: -l 'sys_*' -l 'SyS_*' To disable function graph tracing totally, use: -p nop To use function tracing instead (note, this will not record timings, but just the count of times a function is hit): -p function Following the functions are the events that are recorded. Event: sys_enter:35 (1) Total: 1000599765 Avg: 1000599765 Max: 1000599765 Min:1000599765 Event: sys_enter:11 (1) Total: 55025 Avg: 55025 Max: 55025 Min:55025 Event: sys_enter:158 (1) Total: 1584 Avg: 1584 Max: 1584 Min:1584 Event: sys_enter:10 (4) Total: 18359 Avg: 4589 Max: 8764 Min:2933 Event: sys_enter:0 (1) Total: 4223 Avg: 4223 Max: 4223 Min:4223 Event: sys_enter:3 (5) Total: 9948 Avg: 1989 Max: 2606 Min:1203 Event: sys_enter:5 (3) Total: 15530 Avg: 5176 Max: 11840 Min:1405 Event: sys_enter:2 (3) Total: 28002 Avg: 9334 Max: 12035 Min:5656 Event: sys_enter:21 (1) Total: 7814 Avg: 7814 Max: 7814 Min:7814 Event: sys_enter:9 (8) Total: 49583 Avg: 6197 Max: 14137 Min:2362 Event: sys_enter:12 (4) Total: 108493 Avg: 27123 Max: 104079 Min:922 Event: sys_enter:59 (5) Total: 1631608 Avg: 326321 Max: 1607529 Min:4563 These are the raw system call events, with the raw system call ID after the "sys_enter:" For example, "59" is execve(2). Why did it execute 5 times? Looking at a strace of this run, we can see: execve("/usr/lib64/ccache/sleep", ["sleep", "1"], [/* 27 vars */] <... execve resumed> ) = -1 ENOENT (No such file or directory) execve("/usr/local/sbin/sleep", ["sleep", "1"], [/* 27 vars */] <... execve resumed> ) = -1 ENOENT (No such file or directory) execve("/usr/local/bin/sleep", ["sleep", "1"], [/* 27 vars */] <... execve resumed> ) = -1 ENOENT (No such file or directory) execve("/usr/sbin/sleep", ["sleep", "1"], [/* 27 vars */] <... execve resumed> ) = -1 ENOENT (No such file or directory) execve("/usr/bin/sleep", ["sleep", "1"], [/* 27 vars */] <... execve resumed> ) = 0 It attempted to execve the "sleep" command for each path in $PATH until it found one. The page_fault_user events show what userspace address took a page fault. Event: softirq_raise:RCU (3) Total: 252931 Avg: 84310 Max: 243288 Min:4639 Event: softirq_raise:SCHED (2) Total: 241249 Avg: 120624 Max: 239076 Min:2173 | + ftrace_raw_event_sched_wakeup_template (0xffffffff8109d960) 100% (1) time:239076 max:239076 min:0 avg:239076 ttwu_do_wakeup (0xffffffff810a01a2) ttwu_do_activate.constprop.122 (0xffffffff810a0236) try_to_wake_up (0xffffffff810a3ec3) default_wake_function (0xffffffff810a4002) autoremove_wake_function (0xffffffff810b50fd) __wake_up_common (0xffffffff810b4958) __wake_up (0xffffffff810b4cb8) rb_wake_up_waiters (0xffffffff8112f126) irq_work_run_list (0xffffffff81157d0f) irq_work_run (0xffffffff81157d5e) smp_trace_irq_work_interrupt (0xffffffff810082fc) trace_irq_work_interrupt (0xffffffff816c7aaa) irq_exit (0xffffffff8107dd66) The timings for the softirq_raise events measure the time it took from the raised softirq to the time it executed. The timings for the softirq_entry events measure the time the softirq took to execute. The stack traces for the softirqs (and possibly other events) are used when an event has a stack attached to it. This can happen if the profile ran more stacks than just the sched events, or when events are dropped and stacks To have full control of what gets traced, use the *-S* option that will have trace-cmd not enable any events or the function_graph tracer. Only the events listed on the command line are shown. If only the time of kmalloc is needed to be seen, and where it was recorded, using the *-S* option and enabling function_graph and stack tracing for just the function needed will give the profile of only that function. --- # trace-cmd profile -S -p function_graph -l '*kmalloc*' -l '*kmalloc*:stacktrace' sleep 1 task: sshd-11786 Event: func: __kmalloc_reserve.isra.59() (2) Total: 149684 Avg: 74842 Max: 75598 Min:74086 | + __alloc_skb (0xffffffff815a8917) | 67% (2) time:149684 max:75598 min:74086 avg:74842 | __kmalloc_node_track_caller (0xffffffff811c6635) | __kmalloc_reserve.isra.59 (0xffffffff815a84ac) | return_to_handler (0xffffffff816c8458) | sk_stream_alloc_skb (0xffffffff81604ea1) | tcp_sendmsg (0xffffffff8160592c) | inet_sendmsg (0xffffffff8162fed1) | sock_aio_write (0xffffffff8159f9fc) | do_sync_write (0xffffffff811d694a) | vfs_write (0xffffffff811d7825) | SyS_write (0xffffffff811d7adf) | system_call_fastpath (0xffffffff816c63d2) | + __alloc_skb (0xffffffff815a8917) 33% (1) time:74086 max:74086 min:74086 avg:74086 __alloc_skb (0xffffffff815a8917) sk_stream_alloc_skb (0xffffffff81604ea1) tcp_sendmsg (0xffffffff8160592c) inet_sendmsg (0xffffffff8162fed1) sock_aio_write (0xffffffff8159f9fc) do_sync_write (0xffffffff811d694a) vfs_write (0xffffffff811d7825) SyS_write (0xffffffff811d7adf) system_call_fastpath (0xffffffff816c63d2) [..] --- To watch the command run but save the output of the profile to a file use --stderr, and redirect stderr to a file # trace-cmd profile --stderr cyclictest -p 80 -n -t1 2> profile.out Or simple use *-o* # trace-cmd profile -o profile.out cyclictest -p 80 -n -t1 SEE ALSO -------- trace-cmd(1), trace-cmd-record(1), trace-cmd-report(1), trace-cmd-start(1), trace-cmd-stop(1), trace-cmd-reset(1), trace-cmd-split(1), trace-cmd-list(1), trace-cmd-listen(1) AUTHOR ------ Written by Steven Rostedt, RESOURCES --------- https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/ COPYING ------- Copyright \(C) 2014 Red Hat, Inc. Free use of this software is granted under the terms of the GNU Public License (GPL). trace-cmd-v3.3.1/Documentation/trace-cmd/trace-cmd-record.1.txt000066400000000000000000000661141470231550600242410ustar00rootroot00000000000000TRACE-CMD-RECORD(1) =================== NAME ---- trace-cmd-record - record a trace from the Ftrace Linux internal tracer SYNOPSIS -------- *trace-cmd record* ['OPTIONS'] ['command'] DESCRIPTION ----------- The trace-cmd(1) record command will set up the Ftrace Linux kernel tracer to record the specified plugins or events that happen while the 'command' executes. If no command is given, then it will record until the user hits Ctrl-C. The record command of trace-cmd will set up the Ftrace tracer to start tracing the various events or plugins that are given on the command line. It will then create a number of tracing processes (one per CPU) that will start recording from the kernel ring buffer straight into temporary files. When the command is complete (or Ctrl-C is hit) all the files will be combined into a trace.dat file that can later be read (see trace-cmd-report(1)). OPTIONS ------- *-p* 'tracer':: Specify a tracer. Tracers usually do more than just trace an event. Common tracers are: *function*, *function_graph*, *preemptirqsoff*, *irqsoff*, *preemptoff* and *wakeup*. A tracer must be supported by the running kernel. To see a list of available tracers, see trace-cmd-list(1). *-e* 'event':: Specify an event to trace. Various static trace points have been added to the Linux kernel. They are grouped by subsystem where you can enable all events of a given subsystem or specify specific events to be enabled. The 'event' is of the format "subsystem:event-name". You can also just specify the subsystem without the ':event-name' or the event-name without the "subsystem:". Using "-e sched_switch" will enable the "sched_switch" event where as, "-e sched" will enable all events under the "sched" subsystem. The 'event' can also contain glob expressions. That is, "*stat*" will select all events (or subsystems) that have the characters "stat" in their names. The keyword 'all' can be used to enable all events. *-a*:: Every event that is being recorded has its output format file saved in the output file to be able to display it later. But if other events are enabled in the trace without trace-cmd's knowledge, the formats of those events will not be recorded and trace-cmd report will not be able to display them. If this is the case, then specify the *-a* option and the format for all events in the system will be saved. *-T*:: Enable a stacktrace on each event. For example: -0 [003] 58549.289091: sched_switch: kworker/0:1:0 [120] R ==> trace-cmd:2603 [120] -0 [003] 58549.289092: kernel_stack: => schedule (ffffffff814b260e) => cpu_idle (ffffffff8100a38c) => start_secondary (ffffffff814ab828) *--func-stack*:: Enable a stack trace on all functions. Note this is only applicable for the "function" plugin tracer, and will only take effect if the -l option is used and succeeds in limiting functions. If the function tracer is not filtered, and the stack trace is enabled, you can live lock the machine. *-f* 'filter':: Specify a filter for the previous event. This must come after a *-e*. This will filter what events get recorded based on the content of the event. Filtering is passed to the kernel directly so what filtering is allowed may depend on what version of the kernel you have. Basically, it will let you use C notation to check if an event should be processed or not. [source,bison] ---- ==, >=, <=, >, <, &, |, && and || ---- The above are usually safe to use to compare fields. *--no-filter*:: Do not filter out the trace-cmd threads. By default, the threads are filtered out to not be traced by events. This option will have the trace-cmd threads also be traced. *-R* 'trigger':: Specify a trigger for the previous event. This must come after a *-e*. This will add a given trigger to the given event. To only enable the trigger and not the event itself, then place the event after the *-v* option. See Documentation/trace/events.txt in the Linux kernel source for more information on triggers. *-v*:: This will cause all events specified after it on the command line to not be traced. This is useful for selecting a subsystem to be traced but to leave out various events. For Example: "-e sched -v -e "\*stat\*"" will enable all events in the sched subsystem except those that have "stat" in their names. Note: the *-v* option was taken from the way grep(1) inverts the following matches. *-F*:: This will filter only the executable that is given on the command line. If no command is given, then it will filter itself (pretty pointless). Using *-F* will let you trace only events that are caused by the given command. *-P* 'pid':: Similar to *-F* but lets you specify a process ID to trace. *-c*:: Used with either *-F* (or *-P* if kernel supports it) to trace the process' children too. *--user*:: Execute the specified *command* as given user. *-C* 'clock':: Set the trace clock to "clock". Use trace-cmd(1) list -C to see what clocks are available. *-o* 'output-file':: By default, trace-cmd report will create a 'trace.dat' file. You can specify a different file to write to with the *-o* option. *-l* 'function-name':: This will limit the 'function' and 'function_graph' tracers to only trace the given function name. More than one *-l* may be specified on the command line to trace more than one function. This supports both full regex(3) parsing, or basic glob parsing. If the filter has only alphanumeric, '_', '*', '?' and '.' characters, then it will be parsed as a basic glob. to force it to be a regex, prefix the filter with '^' or append it with '$' and it will then be parsed as a regex. *-g* 'function-name':: This option is for the function_graph plugin. It will graph the given function. That is, it will only trace the function and all functions that it calls. You can have more than one *-g* on the command line. *-n* 'function-name':: This has the opposite effect of *-l*. The function given with the *-n* option will not be traced. This takes precedence, that is, if you include the same function for both *-n* and *-l*, it will not be traced. *-d*:: Some tracer plugins enable the function tracer by default. Like the latency tracers. This option prevents the function tracer from being enabled at start up. *-D*:: The option *-d* will try to use the function-trace option to disable the function tracer (if available), otherwise it defaults to the proc file: /proc/sys/kernel/ftrace_enabled, but will not touch it if the function-trace option is available. The *-D* option will disable both the ftrace_enabled proc file as well as the function-trace option if it exists. Note, this disable function tracing for all users, which includes users outside of ftrace tracers (stack_tracer, perf, etc). *-O* 'option':: Ftrace has various options that can be enabled or disabled. This allows you to set them. Appending the text 'no' to an option disables it. For example: "-O nograph-time" will disable the "graph-time" Ftrace option. *-s* 'interval':: The processes that trace-cmd creates to record from the ring buffer need to wake up to do the recording. Setting the 'interval' to zero will cause the processes to wakeup every time new data is written into the buffer. But since Ftrace is recording kernel activity, the act of this processes going back to sleep may cause new events into the ring buffer which will wake the process back up. This will needlessly add extra data into the ring buffer. The 'interval' metric is microseconds. The default is set to 1000 (1 ms). This is the time each recording process will sleep before waking up to record any new data that was written to the ring buffer. *-r* 'priority':: The priority to run the capture threads at. In a busy system the trace capturing threads may be staved and events can be lost. This increases the priority of those threads to the real time (FIFO) priority. But use this option with care, it can also change the behaviour of the system being traced. *-b* 'size':: This sets the ring buffer size to 'size' kilobytes. Because the Ftrace ring buffer is per CPU, this size is the size of each per CPU ring buffer inside the kernel. Using "-b 10000" on a machine with 4 CPUs will make Ftrace have a total buffer size of 40 Megs. *--subbuf-size*:: The Linux kernel tracing ring buffer is broken up into sub-buffers. These sub-buffers are typically the size of the architecture "page-size". (4096 or x86). An event can only be as big as the data portion of a sub-buffer, but in most cases that's not an issue. But the time the writer takes to switch from one sub-buffer to the next has a bit more overhead than adding events within the sub-buffer. By increasing its size, it will allow bigger events (although that is seldom an issue) but also speed up the tracing itself. The downside of larger sub-buffers is that a "read" of the ring buffer will pull the sub-buffer size out of the ring buffer and replace it with a new sub-buffer. This may not have any real impact, but it may change the behavior slightly. Or it may not! *-B* 'buffer-name':: If the kernel supports multiple buffers, this will add a buffer with the given name. If the buffer name already exists, that buffer is just reset and will not be deleted at the end of record execution. If the buffer is created, it will be removed at the end of execution (unless the *-k* is set, or 'start' command was used). After a buffer name is stated, all events added after that will be associated with that buffer. If no buffer is specified, or an event is specified before a buffer name, it will be associated with the main (toplevel) buffer. trace-cmd record -e sched -B block -e block -B time -e timer sleep 1 The above is will enable all sched events in the main buffer. It will then create a 'block' buffer instance and enable all block events within that buffer. A 'time' buffer instance is created and all timer events will be enabled for that event. *-m* 'size':: The max size in kilobytes that a per cpu buffer should be. Note, due to rounding to page size, the number may not be totally correct. Also, this is performed by switching between two buffers that are half the given size thus the output may not be of the given size even if much more was written. Use this to prevent running out of diskspace for long runs. *-M* 'cpumask':: Set the cpumask for to trace. It only affects the last buffer instance given. If supplied before any buffer instance, then it affects the main buffer. The value supplied must be a hex number. trace-cmd record -p function -M c -B events13 -e all -M 5 If the -M is left out, then the mask stays the same. To enable all CPUs, pass in a value of '-1'. *-k*:: By default, when trace-cmd is finished tracing, it will reset the buffers and disable all the tracing that it enabled. This option keeps trace-cmd from disabling the tracer and reseting the buffer. This option is useful for debugging trace-cmd. Note: usually trace-cmd will set the "tracing_on" file back to what it was before it was called. This option will leave that file set to zero. *-i*:: By default, if an event is listed that trace-cmd does not find, it will exit with an error. This option will just ignore events that are listed on the command line but are not found on the system. *-N* 'host:port':: If another machine is running "trace-cmd listen", this option is used to have the data sent to that machine with UDP packets. Instead of writing to an output file, the data is sent off to a remote box. This is ideal for embedded machines with little storage, or having a single machine that will keep all the data in a single repository. Note: This option is not supported with latency tracer plugins: wakeup, wakeup_rt, irqsoff, preemptoff and preemptirqsoff *-V* 'cid:port':: If recording on a guest VM and the host is running *trace-cmd listen* with the *-V* option as well, or if this is recording on the host, and a guest in running *trace-cmd listen* with the *-V* option, then connect to the listener (the same as connecting with the *-N* option via the network). This has the same limitations as the *-N* option above with respect to latency tracer plugins. *-t*:: This option is used with *-N*, when there's a need to send the live data with TCP packets instead of UDP. Although TCP is not nearly as fast as sending the UDP packets, but it may be needed if the network is not that reliable, the amount of data is not that intensive, and a guarantee is needed that all traced information is transfered successfully. *-q* | *--quiet*:: For use with recording an application. Suppresses normal output (except for errors) to allow only the application's output to be displayed. *--date*:: With the *--date* option, "trace-cmd" will write timestamps into the trace buffer after it has finished recording. It will then map the timestamp to gettimeofday which will allow wall time output from the timestamps reading the created 'trace.dat' file. *--max-graph-depth* 'depth':: Set the maximum depth the function_graph tracer will trace into a function. A value of one will only show where userspace enters the kernel but not any functions called in the kernel. The default is zero, which means no limit. *--cmdlines-size* 'size':: Set the number of entries the kernel tracing file "saved_cmdlines" can contain. This file is a circular buffer which stores the mapping between cmdlines and PIDs. If full, it leads to unresolved cmdlines ("<...>") within the trace. The kernel default value is 128. *--module* 'module':: Filter a module's name in function tracing. It is equivalent to adding ':mod:module' after all other functions being filtered. If no other function filter is listed, then all modules functions will be filtered in the filter. '--module snd' is equivalent to '-l :mod:snd' '--module snd -l "*jack*"' is equivalent to '-l "*jack*:mod:snd"' '--module snd -n "*"' is equivalent to '-n :mod:snd' *--proc-map*:: Save the traced process address map into the trace.dat file. The traced processes can be specified using the option *-P*, or as a given 'command'. *--profile*:: With the *--profile* option, "trace-cmd" will enable tracing that can be used with trace-cmd-report(1) --profile option. If a tracer *-p* is not set, and function graph depth is supported by the kernel, then the function_graph tracer will be enabled with a depth of one (only show where userspace enters into the kernel). It will also enable various tracepoints with stack tracing such that the report can show where tasks have been blocked for the longest time. See trace-cmd-profile(1) for more details and examples. *-G*:: Set interrupt (soft and hard) events as global (associated to CPU instead of tasks). Only works for --profile. *-H* 'event-hooks':: Add custom event matching to connect any two events together. When not used with *--profile*, it will save the parameter and this will be used by trace-cmd report --profile, too. That is: trace-cmd record -H hrtimer_expire_entry,hrtimer/hrtimer_expire_exit,hrtimer,sp trace-cmd report --profile Will profile hrtimer_expire_entry and hrtimer_expire_ext times. See trace-cmd-profile(1) for format. *-S*:: (for --profile only) Only enable the tracer or events speficied on the command line. With this option, the function_graph tracer is not enabled, nor are any events (like sched_switch), unless they are specifically specified on the command line (i.e. -p function -e sched_switch -e sched_wakeup) *--temp* 'directory':: When *trace-cmd* is recording the trace, it records the per CPU data into a separate file for each CPU. At the end of the trace, these files are concatenated onto the final trace.dat file. If the final file is on a network file system, it may not be appropriate to copy these temp files into the same location. *--temp* can be used to tell *trace-cmd* where those temp files should be created. *--ts-offset offset*:: Add an offset for the timestamp in the trace.dat file. This will add a offset option into the trace.dat file such that a trace-cmd report will offset all the timestamps of the events by the given offset. The offset is in raw units. That is, if the event timestamps are in nanoseconds the offset will also be in nanoseconds even if the displayed units are in microseconds. *--tsync-interval*:: Set the loop interval, in ms, for timestamps synchronization with guests: If a negative number is specified, timestamps synchronization is disabled If 0 is specified, no loop is performed - timestamps offset is calculated only twice, at the beginning and at the end of the trace. Timestamps synchronization with guests works only if there is support for VSOCK. *--tsc2nsec*:: Convert the current clock to nanoseconds, using tsc multiplier and shift from the Linux kernel's perf interface. This option does not change the trace clock, just assumes that the tsc multiplier and shift are applicable for the selected clock. You may use the "-C tsc2nsec" clock, if not sure what clock to select. *--stderr*:: Have output go to stderr instead of stdout, but the output of the command executed will not be changed. This is useful if you want to monitor the output of the command being executed, but not see the output from trace-cmd. *--poll*:: Waiting for data to be available on the trace ring-buffers may trigger IPIs. This might generate unacceptable trace noise when tracing low latency or real time systems. The poll option forces trace-cmd to use O_NONBLOCK. Traces are extracted by busy waiting, which will hog the CPUs, so only use when really needed. *--name*:: Give a specific name for the current agent being processed. Used after *-A* to give the guest being traced a name. Useful when using the vsocket ID instead of a name of the guest. *--verbose*[='level']:: Set the log level. Supported log levels are "none", "critical", "error", "warning", "info", "debug", "all" or their identifiers "0", "1", "2", "3", "4", "5", "6". Setting the log level to specific value enables all logs from that and all previous levels. The level will default to "info" if one is not specified. Example: enable all critical, error and warning logs trace-cmd record --verbose=warning *--file-version*:: Desired version of the output file. Supported versions are 6 or 7. *--compression*:: Compression of the trace output file, one of these strings can be passed: 'any' - auto select the best available compression algorithm 'none' - do not compress the trace file 'name' - the name of the desired compression algorithms. Available algorithms can be listed with trace-cmd list -c *--proxy* 'vsocket':: Use a vsocket proxy to reach the agent. Acts the same as *-A* (for an agent) but will send the proxy connection to the agent. It is expected to run on a privileged guest that the host is aware of (as denoted by the 'cid' in the *-P* option for the agent). *--daemonize* Run trace-cmd in the background as a daemon after recording has started. Creates a pidfile at /var/run/trace-cmd-record.pid with the pid of trace-cmd during the recording. EXAMPLES -------- The basic way to trace all events: [source,shell] ---- # trace-cmd record -e all ls > /dev/null # trace-cmd report trace-cmd-13541 [003] 106260.693809: filemap_fault: address=0x128122 offset=0xce trace-cmd-13543 [001] 106260.693809: kmalloc: call_site=81128dd4 ptr=0xffff88003dd83800 bytes_req=768 bytes_alloc=1024 gfp_flags=GFP_KERNEL|GFP_ZERO ls-13545 [002] 106260.693809: kfree: call_site=810a7abb ptr=0x0 ls-13545 [002] 106260.693818: sys_exit_write: 0x1 ---- To use the function tracer with sched switch tracing: [source,shell] ---- # trace-cmd record -p function -e sched_switch ls > /dev/null # trace-cmd report ls-13587 [002] 106467.860310: function: hrtick_start_fair <-- pick_next_task_fair ls-13587 [002] 106467.860313: sched_switch: prev_comm=trace-cmd prev_pid=13587 prev_prio=120 prev_state=R ==> next_comm=trace-cmd next_pid=13583 next_prio=120 trace-cmd-13585 [001] 106467.860314: function: native_set_pte_at <-- __do_fault trace-cmd-13586 [003] 106467.860314: function: up_read <-- do_page_fault ls-13587 [002] 106467.860317: function: __phys_addr <-- schedule trace-cmd-13585 [001] 106467.860318: function: _raw_spin_unlock <-- __do_fault ls-13587 [002] 106467.860320: function: native_load_sp0 <-- __switch_to trace-cmd-13586 [003] 106467.860322: function: down_read_trylock <-- do_page_fault ---- Here is a nice way to find what interrupts have the highest latency: [source,shell] ---- # trace-cmd record -p function_graph -e irq_handler_entry -l do_IRQ sleep 10 # trace-cmd report -0 [000] 157412.933969: funcgraph_entry: | do_IRQ() { -0 [000] 157412.933974: irq_handler_entry: irq=48 name=eth0 -0 [000] 157412.934004: funcgraph_exit: + 36.358 us | } -0 [000] 157413.895004: funcgraph_entry: | do_IRQ() { -0 [000] 157413.895011: irq_handler_entry: irq=48 name=eth0 -0 [000] 157413.895026: funcgraph_exit: + 24.014 us | } -0 [000] 157415.891762: funcgraph_entry: | do_IRQ() { -0 [000] 157415.891769: irq_handler_entry: irq=48 name=eth0 -0 [000] 157415.891784: funcgraph_exit: + 22.928 us | } -0 [000] 157415.934869: funcgraph_entry: | do_IRQ() { -0 [000] 157415.934874: irq_handler_entry: irq=48 name=eth0 -0 [000] 157415.934906: funcgraph_exit: + 37.512 us | } -0 [000] 157417.888373: funcgraph_entry: | do_IRQ() { -0 [000] 157417.888381: irq_handler_entry: irq=48 name=eth0 -0 [000] 157417.888398: funcgraph_exit: + 25.943 us | } ---- An example of the profile: [source,shell] ---- # trace-cmd record --profile sleep 1 # trace-cmd report --profile --comm sleep task: sleep-21611 Event: sched_switch:R (1) Total: 99442 Avg: 99442 Max: 99442 Min:99442 1 total:99442 min:99442 max:99442 avg=99442 => ftrace_raw_event_sched_switch (0xffffffff8105f812) => __schedule (0xffffffff8150810a) => preempt_schedule (0xffffffff8150842e) => ___preempt_schedule (0xffffffff81273354) => cpu_stop_queue_work (0xffffffff810b03c5) => stop_one_cpu (0xffffffff810b063b) => sched_exec (0xffffffff8106136d) => do_execve_common.isra.27 (0xffffffff81148c89) => do_execve (0xffffffff811490b0) => SyS_execve (0xffffffff811492c4) => return_to_handler (0xffffffff8150e3c8) => stub_execve (0xffffffff8150c699) Event: sched_switch:S (1) Total: 1000506680 Avg: 1000506680 Max: 1000506680 Min:1000506680 1 total:1000506680 min:1000506680 max:1000506680 avg=1000506680 => ftrace_raw_event_sched_switch (0xffffffff8105f812) => __schedule (0xffffffff8150810a) => schedule (0xffffffff815084b8) => do_nanosleep (0xffffffff8150b22c) => hrtimer_nanosleep (0xffffffff8108d647) => SyS_nanosleep (0xffffffff8108d72c) => return_to_handler (0xffffffff8150e3c8) => tracesys_phase2 (0xffffffff8150c304) Event: sched_wakeup:21611 (1) Total: 30326 Avg: 30326 Max: 30326 Min:30326 1 total:30326 min:30326 max:30326 avg=30326 => ftrace_raw_event_sched_wakeup_template (0xffffffff8105f653) => ttwu_do_wakeup (0xffffffff810606eb) => ttwu_do_activate.constprop.124 (0xffffffff810607c8) => try_to_wake_up (0xffffffff8106340a) ---- An example of using --daemonize together with guest/host tracing: [source,shell] ---- $ sudo trace-cmd record --daemonize -p nop -e 'sched:sched_process_exec' -A guest -p nop -e net && > ping -c 1 10.20.1.2 && > sudo start-stop-daemon --stop --signal INT --retry 20 --pidfile /var/run/trace-cmd-record.pid && > sudo trace-cmd report -i trace.dat -i trace-guest.dat | head Negotiated kvm time sync protocol with guest guest Send SIGINT to pid 3071371 to stop recording PING 10.20.1.2 (10.20.1.2) 56(84) bytes of data. 64 bytes from 10.20.1.2: icmp_seq=1 ttl=64 time=0.134 ms --- 10.20.1.2 ping statistics --- 1 packets transmitted, 1 received, 0% packet loss, time 0ms rtt min/avg/max/mdev = 0.134/0.134/0.134/0.000 ms CPU0 data recorded at offset=0x14f000 229 bytes in size (4096 uncompressed) .... trace.dat: cpus=28 trace-guest.dat: cpus=1 trace.dat: ping-3071450 [013] 1196830.834258: sched_process_exec: filename=/bin/ping pid=3071450 old_pid=3071450 trace-guest.dat: -0 [000] 1196830.835990: napi_gro_receive_entry: dev=eth1 napi_id=0x2002 queue_mapping=1 skbaddr=0xffff95d051a5c400 vlan_tagged=0 vlan_proto=0x0000 vlan_tci=0x0000 protocol=0x0800 ip_summed=0 hash=0x00000000 l4_hash=0 len=84 data_len=0 truesize=768 mac_header_valid=1 mac_header=-14 nr_frags=0 gso_size=0 gso_type=0 trace-guest.dat: -0 [000] 1196830.835997: napi_gro_receive_exit: ret=3 trace-guest.dat: -0 [000] 1196830.835998: netif_receive_skb: dev=eth1 skbaddr=0xffff95d051a5c400x len=84 trace-guest.dat: -0 [000] 1196830.836021: net_dev_queue: dev=eth1 skbaddr=0xffff95d051a5c700x len=98 trace-guest.dat: -0 [000] 1196830.836024: net_dev_start_xmit: dev=eth1 queue_mapping=0 skbaddr=0xffff95d051a5c700 vlan_tagged=0 vlan_proto=0x0000 vlan_tci=0x0000 protocol=0x0800 ip_summed=0 len=98 data_len=0 network_offset=14 transport_offset_valid=1 transport_offset=34 tx_flags=0 gso_size=0 gso_segs=0 gso_type=0 trace-guest.dat: -0 [000] 1196830.836069: net_dev_xmit: dev=eth1 skbaddr=0xffff95d051a5c700 len=98 rc=0 trace.dat: sudo-3071451 [015] 1196830.838262: sched_process_exec: filename=/usr/bin/sudo pid=3071451 old_pid=3071451 ---- SEE ALSO -------- trace-cmd(1), trace-cmd-report(1), trace-cmd-start(1), trace-cmd-stop(1), trace-cmd-extract(1), trace-cmd-reset(1), trace-cmd-split(1), trace-cmd-list(1), trace-cmd-listen(1), trace-cmd-profile(1) AUTHOR ------ Written by Steven Rostedt, RESOURCES --------- https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/ COPYING ------- Copyright \(C) 2010 Red Hat, Inc. Free use of this software is granted under the terms of the GNU Public License (GPL). trace-cmd-v3.3.1/Documentation/trace-cmd/trace-cmd-report.1.txt000066400000000000000000000763531470231550600243040ustar00rootroot00000000000000TRACE-CMD-REPORT(1) =================== NAME ---- trace-cmd-report - show in ASCII a trace created by trace-cmd record SYNOPSIS -------- *trace-cmd report* ['OPTIONS'] ['input-file' ['input-file' ...]] DESCRIPTION ----------- The trace-cmd(1) report command will output a human readable report of a trace created by trace-cmd record. OPTIONS ------- *-i* 'input-file':: By default, trace-cmd report will read the file 'trace.dat'. But the *-i* option open up the given 'input-file' instead. Note, the input file may also be specified as the last item on the command line. *-e*:: This outputs the endianess of the file. trace-cmd report is smart enough to be able to read big endian files on little endian machines, and vise versa. *-f*:: This outputs the list of all functions that have been mapped in the trace.dat file. Note, this list may contain functions that may not appear in the trace, as it is the list of mappings to translate function addresses into function names. *-P*:: This outputs the list of "trace_printk()" data. The raw trace data points to static pointers in the kernel. This must be stored in the trace.dat file. *-E*:: This lists the possible events in the file (but this list is not necessarily the list of events in the file). *--events*:: This will list the event formats that are stored in the trace.dat file. *--event* regex:: This will print events that match the given regex. If a colon is specified, then the characters before the colon will be used to match the system and the characters after the colon will match the event. trace-cmd report --event sys:read The above will only match events where the system name contains "sys" and the event name contains "read". trace-cmd report --event read The above will match all events that contain "read" in its name. Also it may list all events of a system that contains "read" as well. *--check-events*:: This will parse the event format strings that are stored in the trace.dat file and return whether the formats can be parsed correctly. It will load plugins unless *-N* is specified. *-t*:: Print the full timestamp. The timestamps in the data file are usually recorded to the nanosecond. But the default display of the timestamp is only to the microsecond. To see the full timestamp, add the *-t* option. *-F* 'filter':: Add a filter to limit what events are displayed. Filters defined after an input file (specified with *-i*) only apply to that input file. Filters provided before any input file is given are considered global and apply to all input files. The format of the filter is: [source,bison] ---- ':' = SYSTEM'/'EVENT | SYSTEM | EVENT | ',' = EVENT_FIELD | '&&' | '||' | '(' ')' | '!' = '==' | '!=' | '>=' | '<=' | '>' | '<' | '&' | '|' | '^' | '+' | '-' | '*' | '/' | '%' = NUM | STRING | EVENT_FIELD ---- SYSTEM is the name of the system to filter on. If the EVENT is left out, then it applies to all events under the SYSTEM. If only one string is used without the '/' to deliminate between SYSTEM and EVENT, then the filter will be applied to all systems and events that match the given string. Whitespace is ignored, such that "sched:next_pid==123" is equivalent to "sched : next_pid == 123". STRING is defined with single or double quotes (single quote must end with single quote, and double with double). Whitespace within quotes are not ignored. The representation of a SYSTEM or EVENT may also be a regular expression as defined by 'regcomp(3)'. The EVENT_FIELD is the name of the field of an event that is being filtered. If the event does not contain the EVENT_FIELD, that part of the equation will be considered false. [source,shell] ---- -F 'sched : bogus == 1 || common_pid == 2' ---- The "bogus == 1" will always evaluate to FALSE because no event has a field called "bogus", but the "common_pid == 2" will still be evaluated since all events have the field "common_pid". Any "sched" event that was traced by the process with the PID of 2 will be shown. Note, the EVENT_FIELD is the field name as shown by an events format (as displayed with *--events*), and not what is found in the output. If the output shows "ID:foo" but the field that "foo" belongs to was called "name" in the event format, then "name" must be used in the filter. The same is true about values. If the value that is displayed is converted by to a string symbol, the filter checks the original value and not the value displayed. For example, to filter on all tasks that were in the running state at a context switch: [source,shell] ---- -F 'sched/sched_switch : prev_state==0' ---- Although the output displays 'R', having 'prev_stat=="R"' will not work. Note: You can also specify 'COMM' as an EVENT_FIELD. This will use the task name (or comm) of the record to compare. For example, to filter out all of the "trace-cmd" tasks: [source,shell] ---- -F '.*:COMM != "trace-cmd"' ---- *-I*:: Do not print events where the HARDIRQ latency flag is set. This will filter out most events that are from interrupt context. Note, it may not filter out function traced functions that are in interrupt context but were called before the kernel "in interrupt" flag was set. *-S*:: Do not print events where the SOFTIRQ latency flag is set. This will filter out most events that are from soft interrupt context. *-v*:: This causes the following filters of *-F* to filter out the matching events. [source,shell] ---- -v -F 'sched/sched_switch : prev_state == 0' ---- Will not display any sched_switch events that have a prev_state of 0. Removing the *-v* will only print out those events. *-T*:: Test the filters of -F. After processing a filter string, the resulting filter will be displayed for each event. This is useful for using a filter for more than one event where a field may not exist in all events. Also it can be used to make sure there are no misspelled event field names, as they will simply be ignored. *-T* is ignored if *-F* is not specified. *-V*:: Show verbose messages (see *--verbose* but only for the numbers) *-L*:: This will not load system wide plugins. It loads "local only". That is what it finds in the ~/.trace-cmd/plugins directory. *-N*:: This will not load any plugins. *-n* 'event-re':: This will cause all events that match the option to ignore any registered handler (by the plugins) to print the event. The normal event will be printed instead. The 'event-re' is a regular expression as defined by 'regcomp(3)'. *--profile*:: With the *--profile* option, "trace-cmd report" will process all the events first, and then output a format showing where tasks have spent their time in the kernel, as well as where they are blocked the most, and where wake up latencies are. See trace-cmd-profile(1) for more details and examples. *-G*:: Set interrupt (soft and hard) events as global (associated to CPU instead of tasks). Only works for --profile. *-H* 'event-hooks':: Add custom event matching to connect any two events together. See trace-cmd-profile(1) for format. *-R*:: This will show the events in "raw" format. That is, it will ignore the event's print formatting and just print the contents of each field. *-r* 'event-re':: This will cause all events that match the option to print its raw fields. The 'event-re' is a regular expression as defined by 'regcomp(3)'. *-l*:: This adds a "latency output" format. Information about interrupts being disabled, soft irq being disabled, the "need_resched" flag being set, preempt count, and big kernel lock are all being recorded with every event. But the default display does not show this information. This option will set display this information with 6 characters. When one of the fields is zero or N/A a \'.\' is shown. [source,shell] ---- -0 0d.h1. 106467.859747: function: ktime_get <-- tick_check_idle ---- The 0d.h1. denotes this information. It starts with a number. This represents the CPU number that the event occurred on. The second character is one of the following: 'd' - Interrupts are disabled '.' - Interrupts are enabled 'X' - Has flags that are not yet known by trace-cmd The third character is the "need rescheduling" flag. 'N' - A schedule is set to take place '.' - No scheduling is set The fourth character represents the context the event was in when it triggered 'h' - Hard interrupt context 's' - Soft interrupt context 'H' - Hard interrupt context that interrupted a soft interrupt '.' - Normal context The next is a number (should be less than 10), that represents the preemption depth (the number of times preempt_disable() is called without preempt_enable()). '.' means preemption is enabled. On some systems, "migrate disable" may exist, in which case a number will be shown for that, or '.' meaning migration is enabled. If lockdep in enabled on the system, then the number represents the depth of locks that are held when the event triggered. '.' means no locks are held. *-w*:: If both the 'sched_switch' and 'sched_wakeup' events are enabled, then this option will report the latency between the time the task was first woken, and the time it was scheduled in. *-q*:: Quiet non critical warnings. *-O*:: Pass options to the trace-cmd plugins that are loaded. -O plugin:var=value The 'plugin:' and '=value' are optional. Value may be left off for options that are boolean. If the 'plugin:' is left off, then any variable that matches in all plugins will be set. Example: -O fgraph:tailprint *--cpu* :: List of CPUs, separated by "," or ":", used for filtering the events. A range of CPUs can be specified using "cpuX-cpuY" notation, where all CPUs in the range between cpuX and cpuY will be included in the list. The order of CPUs in the list must be from lower to greater. Example: "--cpu 0,3" - show events from CPUs 0 and 3 "--cpu 2-4" - show events from CPUs 2, 3 and 4 *--cpus*:: List the CPUs that have data in the trace file then exit. *--first-event*:: Show the timestamp of the first event of all CPUs that have data. *--last-event*:: Show the timestamp of the last event of all CPUs that have data. *--stat*:: If the trace.dat file recorded the final stats (outputed at the end of record) the *--stat* option can be used to retrieve them. *--uname*:: If the trace.dat file recorded uname during the run, this will retrieve that information. *--version*:: If the trace.dat file recorded the version of the executable used to create it, report that version. *--ts-offset* offset:: Add (or subtract if negative) an offset for all timestamps of the previous data file specified with *-i*. This is useful to merge sort multiple trace.dat files where the difference in the timestamp is known. For example if a trace is done on a virtual guest, and another trace is done on the host. If the host timestamp is 1000 units ahead of the guest, the following can be done: trace-cmd report -i host.dat --ts-offset -1000 -i guest.dat This will subtract 1000 timestamp units from all the host events as it merges with the guest.dat events. Note, the units is for the raw units recorded in the trace. If the units are nanoseconds, the addition (or subtraction) from the offset will be nanoseconds even if the displayed units are microseconds. *--ts2secs* HZ:: Convert the current clock source into a second (nanosecond resolution) output. When using clocks like x86-tsc, if the frequency is known, by passing in the clock frequency, this will convert the time to seconds. This option affects any trace.dat file given with *-i* proceeding it. If this option comes before any *-i* option, then that value becomes the default conversion for all other trace.dat files. If another --ts2secs option appears after a *-i* trace.dat file, than that option will override the default value. Example: On a 3.4 GHz machine trace-cmd record -p function -C x86-tsc trace-cmd report --ts2ns 3400000000 The report will convert the cycles timestamps into a readable second display. The default display resolution is microseconds, unless *-t* is used. The value of --ts-offset must still be in the raw timestamp units, even with this option. The offset will be converted as well. *--ts-diff*:: Show the time differences between events. The difference will appear in parenthesis just after the timestamp. *--ts-check*:: Make sure no timestamp goes backwards, and if it does, print out a warning message of the fact. *--nodate*:: Ignore converting the timestamps to the date set by *trace-cmd record*(3) --date option. *--raw-ts*:: Display raw timestamps, without any corrections. *--align-ts*:: Display timestamps aligned to the first event. *--verbose*[='level']:: Set the log level. Supported log levels are "none", "crit", "err", "warn", "info", "debug", "all" or their identifiers "0", "1", "2", "3", "4", "5", "6". Setting the log level to specific value enables all logs from that and all previous levels. The level will default to "info" if one is not specified. Example: enable all critical, error and warning logs trace-cmd report --verbose=warning EXAMPLES -------- Using a trace.dat file that was created with: [source,shell] ---- # trace-cmd record -p function -e all sleep 5 ---- The default report shows: [source,shell] ---- # trace-cmd report cpus=8 sleep-89142 [001] ...1. 162573.215752: function: mutex_unlock sleep-89142 [001] ...1. 162573.215754: function: __mutex_unlock_slowpath sleep-89142 [001] ..... 162573.215755: lock_release: 0xffffffff855e7448 trace_types_lock sleep-89142 [001] ..... 162573.215757: lock_release: 0xffff892a01b54420 sb_writers sleep-89142 [001] ...1. 162573.215757: function: preempt_count_add sleep-89142 [001] ...1. 162573.215758: preempt_disable: caller=vfs_write+0x147 parent=vfs_write+0x147 sleep-89142 [001] ...2. 162573.215758: function: rcu_read_lock_any_held sleep-89142 [001] ...2. 162573.215759: function: rcu_lockdep_current_cpu_online sleep-89142 [001] ...2. 162573.215759: function: preempt_count_sub sleep-89142 [001] ...1. 162573.215760: preempt_enable: caller=vfs_write+0x176 parent=vfs_write+0x176 sleep-89142 [001] ...1. 162573.215761: function: __f_unlock_pos sleep-89142 [001] ...1. 162573.215761: function: mutex_unlock [...] ---- The note on the third column: [source,shell] ---- sleep-89998 [002] ...1. 223087.004011: lock_acquire: 0xffff892b7cf32c20 lock sleep-89998 [002] ...1. 223087.004011: lock_acquire: 0xffffffff85517f00 read rcu_read_lock -0 [005] dNh2. 223087.004012: sched_wakeup: trace-cmd:89992 [120] CPU:005 sleep-89998 [002] ...1. 223087.004012: lock_acquire: 0xffffffff85517f00 read rcu_read_lock sleep-89998 [002] ...1. 223087.004013: lock_release: 0xffffffff85517f00 rcu_read_lock ---- It follows the same as shown in the Linux kernel `/sys/kernel/tracing/trace` file. [source,shell] ---- # cat /sys/kernel/tracing/trace # tracer: nop # # entries-in-buffer/entries-written: 0/0 #P:8 # # _-----=> irqs-off/BH-disabled # / _----=> need-resched # | / _---=> hardirq/softirq # || / _--=> preempt-depth # ||| / _-=> migrate-disable # |||| / delay # TASK-PID CPU# ||||| TIMESTAMP FUNCTION # | | | ||||| | | ---- Is the same as explained in the *-l* option. Where the first position is: '.' - means interrupts and bottom halves enabled 'd' - means interrupts and bottom halves are disabled The second position: 'N' - means that the "NEED_RESCHED" flag is set and the kernel should try to schedule as soon as possible. The third position: '.' - In normal/schedulable context 's' - In soft interrupt context 'h' - In hard interrupt context 'H' - in hard interrupt context that interrupted a soft interrupt The forth position is the preempt count depth: 'pass:[.]' - preemption is enabled '#' - the depth of preemption disabled (nested) The fifth column is the migration disabled counter: '.' - migration is enabled '#' - the depth of migration being disabled (nested) To see everything but the function traces: [source,shell] ---- # trace-cmd report -v -F 'function' cpus=8 sleep-89142 [001] ..... 162573.215755: lock_release: 0xffffffff855e7448 trace_types_lock sleep-89142 [001] ..... 162573.215757: lock_release: 0xffff892a01b54420 sb_writers sleep-89142 [001] ...1. 162573.215758: preempt_disable: caller=vfs_write+0x147 parent=vfs_write+0x147 sleep-89142 [001] ...1. 162573.215760: preempt_enable: caller=vfs_write+0x176 parent=vfs_write+0x176 sleep-89142 [001] ..... 162573.215762: lock_release: 0xffff892a19601ac8 &f->f_pos_lock sleep-89142 [001] ..... 162573.215764: sys_exit: NR 1 = 1 sleep-89142 [001] ..... 162573.215766: sys_exit_write: 0x1 sleep-89142 [001] d.... 162573.215767: irq_disable: caller=syscall_exit_to_user_mode+0x15 parent=0x0 sleep-89142 [001] d.... 162573.215768: irq_enable: caller=syscall_exit_to_user_mode+0xed parent=0x0 sleep-89142 [001] ..... 162573.215773: lock_acquire: 0xffff892a4ad29318 read &mm->mmap_lock sleep-89142 [001] ..... 162573.215775: lock_release: 0xffff892a4ad29318 &mm->mmap_lock sleep-89142 [001] ..... 162573.215778: lock_acquire: 0xffff892a4ad29318 read &mm->mmap_lock [...] ---- To see only the kmalloc calls that were greater than 1000 bytes: [source,shell] ---- # trace-cmd report -F 'kmalloc: bytes_req > 1000' cpus=8 sleep-89142 [001] ..... 162573.219401: kmalloc: (tomoyo_find_next_domain+0x84) call_site=tomoyo_find_next_domain+0x84 ptr=0xffff892a176c0000 bytes_req=4096 bytes_alloc=4096 gfp_flags=0xd40 node=-1 accounted=false sleep-89142 [001] ..... 162573.219511: kmalloc: (tomoyo_realpath_from_path+0x42) call_site=tomoyo_realpath_from_path+0x42 ptr=0xffff892a176c6000 bytes_req=4096 bytes_alloc=4096 gfp_flags=0xc40 node=-1 accounted=false trace-cmd-89135 [000] ..... 162573.244301: kmalloc: (kvmalloc_node_noprof+0x43) call_site=kvmalloc_node_noprof+0x43 ptr=0xffff892a63f84000 bytes_req=8193 bytes_alloc=16384 gfp_flags=0x12dc0 node=-1 accounted=false trace-cmd-89135 [000] ..... 162573.244471: kmalloc: (kvmalloc_node_noprof+0x43) call_site=kvmalloc_node_noprof+0x43 ptr=0xffff892a63f84000 bytes_req=8193 bytes_alloc=16384 gfp_flags=0x12dc0 node=-1 accounted=false trace-cmd-89134 [007] ..... 162573.267148: kmalloc: (kvmalloc_node_noprof+0x43) call_site=kvmalloc_node_noprof+0x43 ptr=0xffff892a628d4000 bytes_req=8193 bytes_alloc=16384 gfp_flags=0x12dc0 node=-1 accounted=false trace-cmd-89134 [007] ..... 162573.267403: kmalloc: (kvmalloc_node_noprof+0x43) call_site=kvmalloc_node_noprof+0x43 ptr=0xffff892a628d4000 bytes_req=8193 bytes_alloc=16384 gfp_flags=0x12dc0 node=-1 accounted=false trace-cmd-89141 [002] ..... 162573.290583: kmalloc: (kvmalloc_node_noprof+0x43) call_site=kvmalloc_node_noprof+0x43 ptr=0xffff892a12d3c000 bytes_req=8193 bytes_alloc=16384 gfp_flags=0x12dc0 node=-1 accounted=false trace-cmd-89141 [002] ..... 162573.290754: kmalloc: (kvmalloc_node_noprof+0x43) call_site=kvmalloc_node_noprof+0x43 ptr=0xffff892a12d3c000 bytes_req=8193 bytes_alloc=16384 gfp_flags=0x12dc0 node=-1 accounted=false trace-cmd-89139 [004] ..... 162573.784636: kmalloc: (kvmalloc_node_noprof+0x43) call_site=kvmalloc_node_noprof+0x43 ptr=0xffff892a63d70000 bytes_req=8193 bytes_alloc=16384 gfp_flags=0x12dc0 node=-1 accounted=false [...] ---- To see wakeups and sched switches that left the previous task in the running state: [source,shell] ---- # trace-cmd report -F 'sched: prev_state == 0' -F 'sched_waking' cpus=8 sleep-89142 [001] d.h6. 162573.215941: sched_waking: comm=trace-cmd pid=89135 prio=120 target_cpu=000 -0 [000] dNh7. 162573.216219: sched_waking: comm=trace-cmd pid=89134 prio=120 target_cpu=007 -0 [000] d..2. 162573.216423: sched_switch: swapper/0:0 [120] R ==> trace-cmd:89135 [120] -0 [007] dNh7. 162573.216511: sched_waking: comm=trace-cmd pid=89141 prio=120 target_cpu=002 -0 [007] d..2. 162573.216698: sched_switch: swapper/7:0 [120] R ==> trace-cmd:89134 [120] -0 [002] dNh7. 162573.216776: sched_waking: comm=trace-cmd pid=89136 prio=120 target_cpu=001 -0 [002] d..2. 162573.216951: sched_switch: swapper/2:0 [120] R ==> trace-cmd:89141 [120] sleep-89142 [001] d.s3. 162573.231260: sched_waking: comm=rcu_preempt pid=17 prio=120 target_cpu=002 -0 [002] d..2. 162573.231568: sched_switch: swapper/2:0 [120] R ==> rcu_preempt:17 [120] sleep-89142 [001] d.s2. 162573.240425: sched_waking: comm=rcu_preempt pid=17 prio=120 target_cpu=002 -0 [002] d..2. 162573.240719: sched_switch: swapper/2:0 [120] R ==> rcu_preempt:17 [120] sleep-89142 [001] d.h7. 162573.241983: sched_waking: comm=trace-cmd pid=89135 prio=120 target_cpu=000 ---- The above needs a little explanation. The filter specifies the "sched" subsystem, which includes all scheduling events. Any event that does not have the format field "prev_state", will evaluate those expressions as FALSE, and will not produce a match. Only the sched_switch event will match that. The second "-F" will include the sched_waking event. [source,shell] ---- # trace-cmd report -w -F 'sched_switch, sched_wakeup.*' [...] trace-cmd-89141 [007] d..2. 162583.263060: sched_switch: trace-cmd:89141 [120] R ==> trace-cmd:89135 [120] kworker/u36:1-51219 [000] d..2. 162583.266957: sched_switch: kworker/u36:1:51219 [120] R ==> kworker/u33:2:49692 [120] Latency: 4024.977 usecs trace-cmd-89135 [007] d..2. 162583.267109: sched_switch: trace-cmd:89135 [120] R ==> trace-cmd:89141 [120] trace-cmd-89139 [001] d..2. 162583.267147: sched_switch: trace-cmd:89139 [120] D ==> swapper/1:0 [120] kworker/u36:2-88857 [002] d..2. 162583.267913: sched_switch: kworker/u36:2:88857 [120] R ==> trace-cmd:89136 [120] kworker/u33:2-49692 [000] d..2. 162583.268334: sched_switch: kworker/u33:2:49692 [120] I ==> kworker/u36:1:51219 [120] -0 [001] dNh4. 162583.268747: sched_wakeup: sleep:89142 [120] CPU:001 -0 [001] d..2. 162583.268833: sched_switch: swapper/1:0 [120] R ==> sleep:89142 [120] Latency: 85.751 usecs sleep-89142 [001] d.h4. 162583.269022: sched_wakeup: trace-cmd:89139 [120] CPU:001 trace-cmd-89141 [007] d..2. 162583.271009: sched_switch: trace-cmd:89141 [120] R ==> trace-cmd:89135 [120] trace-cmd-89136 [002] d..2. 162583.271020: sched_switch: trace-cmd:89136 [120] R ==> kworker/u36:2:88857 [120] kworker/u36:2-88857 [002] d..2. 162583.271079: sched_switch: kworker/u36:2:88857 [120] I ==> trace-cmd:89136 [120] trace-cmd-89137 [006] d.h2. 162583.273950: sched_wakeup: trace-cmd:89133 [120] CPU:006 sleep-89142 [001] d..2. 162583.274064: sched_switch: sleep:89142 [120] Z ==> trace-cmd:89139 [120] Latency: 5042.285 usecs trace-cmd-89135 [007] d..2. 162583.275043: sched_switch: trace-cmd:89135 [120] R ==> trace-cmd:89141 [120] trace-cmd-89137 [006] d..2. 162583.275158: sched_switch: trace-cmd:89137 [120] R ==> trace-cmd:89133 [120] Latency: 1207.327 usecs trace-cmd-89136 [002] dNh3. 162583.275229: sched_wakeup: rcu_preempt:17 [120] CPU:002 trace-cmd-89136 [002] d..2. 162583.275294: sched_switch: trace-cmd:89136 [120] R ==> rcu_preempt:17 [120] Latency: 65.255 usecs rcu_preempt-17 [002] d..2. 162583.275399: sched_switch: rcu_preempt:17 [120] I ==> trace-cmd:89136 [120] Average wakeup latency: 20082.580 usecs Maximum Latency: 1032049.277 usecs at timestamp: 162574.787022 Minimum Latency: 29.023 usecs at timestamp: 162583.189731 RT task timings: Average wakeup latency: 139.568 usecs Maximum Latency: 220.583 usecs at timestamp: 162577.347038 Minimum Latency: 75.902 usecs at timestamp: 162577.719121 ---- The above trace produces the wakeup latencies of the tasks. The "sched_switch" event reports each individual latency after writing the event information. At the end of the report, the average wakeup latency is reported, as well as the maxim and minimum latency and the timestamp they happened at. It does this for both normal tasks as well as real-time tasks. [source,shell] ---- # trace-cmd report -w -F 'sched_switch, sched_wakeup.*: prio < 100 || next_prio < 100' cpus=8 -0 [001] dNh5. 162573.291142: sched_wakeup: migration/1:23 [0] CPU:001 -0 [001] d..2. 162573.291237: sched_switch: swapper/1:0 [120] R ==> migration/1:23 [0] Latency: 94.643 usecs trace-cmd-89141 [002] dNh6. 162573.346785: sched_wakeup: migration/2:28 [0] CPU:002 trace-cmd-89141 [002] d..2. 162573.346929: sched_switch: trace-cmd:89141 [120] R ==> migration/2:28 [0] Latency: 143.971 usecs trace-cmd-89134 [003] dNh4. 162573.410852: sched_wakeup: migration/3:33 [0] CPU:003 trace-cmd-89134 [003] d..2. 162573.411039: sched_switch: trace-cmd:89134 [120] R ==> migration/3:33 [0] Latency: 187.640 usecs -0 [004] dNh5. 162573.490944: sched_wakeup: migration/4:38 [0] CPU:004 -0 [004] d..2. 162573.491098: sched_switch: swapper/4:0 [120] R ==> migration/4:38 [0] Latency: 153.913 usecs -0 [005] dNh5. 162573.574955: sched_wakeup: migration/5:43 [0] CPU:005 -0 [005] d..2. 162573.575107: sched_switch: swapper/5:0 [120] R ==> migration/5:43 [0] Latency: 152.875 usecs -0 [006] dNh5. 162573.646878: sched_wakeup: migration/6:48 [0] CPU:006 -0 [006] d..2. 162573.646992: sched_switch: swapper/6:0 [120] R ==> migration/6:48 [0] Latency: 113.788 usecs trace-cmd-89140 [002] dNh7. 162577.346818: sched_wakeup: migration/2:28 [0] CPU:002 trace-cmd-89140 [002] d..2. 162577.347038: sched_switch: trace-cmd:89140 [120] R ==> migration/2:28 [0] Latency: 220.583 usecs trace-cmd-89134 [003] dNh5. 162577.410869: sched_wakeup: migration/3:33 [0] CPU:003 trace-cmd-89141 [005] dNh6. 162577.574792: sched_wakeup: migration/5:43 [0] CPU:005 trace-cmd-89141 [005] d..2. 162577.574915: sched_switch: trace-cmd:89141 [120] R ==> migration/5:43 [0] Latency: 122.648 usecs trace-cmd-89136 [007] dNh6. 162577.719045: sched_wakeup: migration/7:53 [0] CPU:007 trace-cmd-89136 [007] d..2. 162577.719121: sched_switch: trace-cmd:89136 [120] R ==> migration/7:53 [0] Latency: 75.902 usecs trace-cmd-89140 [005] dNh4. 162581.574827: sched_wakeup: migration/5:43 [0] CPU:005 trace-cmd-89140 [005] d..2. 162581.574957: sched_switch: trace-cmd:89140 [120] R ==> migration/5:43 [0] Latency: 129.717 usecs kworker/u46:1-51211 [006] dNh4. 162581.646809: sched_wakeup: migration/6:48 [0] CPU:006 Average wakeup latency: 139.568 usecs Maximum Latency: 220.583 usecs at timestamp: 162577.347038 Minimum Latency: 75.902 usecs at timestamp: 162577.719121 RT task timings: Average wakeup latency: 139.568 usecs Maximum Latency: 220.583 usecs at timestamp: 162577.347038 Minimum Latency: 75.902 usecs at timestamp: 162577.719121 ---- The above version will only show the wakeups and context switches of Real Time tasks. The 'prio' used inside the kernel starts at 0 for highest priority. That is 'prio' 0 is equivalent to user space real time priority 99, and priority 98 is equivalent to user space real time priority 1. Prios less than 100 represent Real Time tasks. Notice that the total wake up timings are identical to the RT task timings. An example of the profile: [source,shell] ---- # trace-cmd record --profile sleep 1 # trace-cmd report --profile --comm sleep task: sleep-21611 Event: sched_switch:R (1) Total: 99442 Avg: 99442 Max: 99442 Min:99442 1 total:99442 min:99442 max:99442 avg=99442 => ftrace_raw_event_sched_switch (0xffffffff8105f812) => __schedule (0xffffffff8150810a) => preempt_schedule (0xffffffff8150842e) => ___preempt_schedule (0xffffffff81273354) => cpu_stop_queue_work (0xffffffff810b03c5) => stop_one_cpu (0xffffffff810b063b) => sched_exec (0xffffffff8106136d) => do_execve_common.isra.27 (0xffffffff81148c89) => do_execve (0xffffffff811490b0) => SyS_execve (0xffffffff811492c4) => return_to_handler (0xffffffff8150e3c8) => stub_execve (0xffffffff8150c699) Event: sched_switch:S (1) Total: 1000506680 Avg: 1000506680 Max: 1000506680 Min:1000506680 1 total:1000506680 min:1000506680 max:1000506680 avg=1000506680 => ftrace_raw_event_sched_switch (0xffffffff8105f812) => __schedule (0xffffffff8150810a) => schedule (0xffffffff815084b8) => do_nanosleep (0xffffffff8150b22c) => hrtimer_nanosleep (0xffffffff8108d647) => SyS_nanosleep (0xffffffff8108d72c) => return_to_handler (0xffffffff8150e3c8) => tracesys_phase2 (0xffffffff8150c304) Event: sched_wakeup:21611 (1) Total: 30326 Avg: 30326 Max: 30326 Min:30326 1 total:30326 min:30326 max:30326 avg=30326 => ftrace_raw_event_sched_wakeup_template (0xffffffff8105f653) => ttwu_do_wakeup (0xffffffff810606eb) => ttwu_do_activate.constprop.124 (0xffffffff810607c8) => try_to_wake_up (0xffffffff8106340a) ---- SEE ALSO -------- trace-cmd(1), trace-cmd-record(1), trace-cmd-start(1), trace-cmd-stop(1), trace-cmd-extract(1), trace-cmd-reset(1), trace-cmd-split(1), trace-cmd-list(1), trace-cmd-listen(1), trace-cmd-profile(1) AUTHOR ------ Written by Steven Rostedt, RESOURCES --------- https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/ COPYING ------- Copyright \(C) 2010 Red Hat, Inc. Free use of this software is granted under the terms of the GNU Public License (GPL). trace-cmd-v3.3.1/Documentation/trace-cmd/trace-cmd-reset.1.txt000066400000000000000000000074401470231550600241020ustar00rootroot00000000000000TRACE-CMD-RESET(1) ================== NAME ---- trace-cmd-reset - turn off all Ftrace tracing to bring back full performance SYNOPSIS -------- *trace-cmd reset* ['OPTIONS'] DESCRIPTION ----------- The trace-cmd(1) reset command turns off all tracing of Ftrace. This will bring back the performance of the system before tracing was enabled. This is necessary since 'trace-cmd-record(1)', 'trace-cmd-stop(1)' and 'trace-cmd-extract(1)' do not disable the tracer, event after the data has been pulled from the buffers. The rational is that the user may want to manually enable the tracer with the Ftrace pseudo file system, or examine other parts of Ftrace to see what trace-cmd did. After the reset command happens, the data in the ring buffer, and the options that were used are all lost. OPTIONS ------- Please note that the order that options are specified on the command line is significant. See EXAMPLES. *-b* 'buffer_size':: When the kernel boots, the Ftrace ring buffer is of a minimal size (3 pages per CPU). The first time the tracer is used, the ring buffer size expands to what it was set for (default 1.4 Megs per CPU). If no more tracing is to be done, this option allows you to shrink the ring buffer down to free up available memory. trace-cmd reset -b 1 The buffer instance affected is the one (or ones) specified by the most recently preceding *-B*, *-t*, or *-a* option: When used after *-B*, resizes the buffer instance that precedes it on the command line. When used after *-a*, resizes all buffer instances except the top one. When used after *-t* or before any *-B* or *-a*, resizes the top instance. *-B* 'buffer-name':: If the kernel supports multiple buffers, this will reset the trace for only the given buffer. It does not affect any other buffer. This may be used multiple times to specify different buffers. The top level buffer will not be reset if this option is given (unless the *-t* option is also supplied). *-a*:: Reset the trace for all existing buffer instances. When this option is used, the top level instance will not be reset unless *-t* is given. *-d*:: This option deletes the instance buffer(s) specified by the most recently preceding *-B* or *-a* option. Because the top-level instance buffer cannot be deleted, it is invalid to use this immediatly following *-t* or prior to any *-B* or *-a* option on the command line. *-t*:: Resets the top level instance buffer. Without the *-B* or *-a* option this is the same as the default. But if *-B* or *-a* is used, this is required if the top level instance buffer should also be reset. EXAMPLES -------- Reset tracing for instance-one and set its per-cpu buffer size to 4096kb. Also deletes instance-two. The top level instance and any other instances remain unaffected: trace-cmd reset -B instance-one -b 4096 -B instance-two -d Delete all instance buffers. Top level instance remains unaffected: trace-cmd reset -a -d Delete all instance buffers and also reset the top instance: trace-cmd reset -t -a -d Invalid. This command implies an attempt to delete the top instance: trace-cmd reset -a -t -d Reset the top instance and set its per-cpu buffer size to 1024kb. If any instance buffers exist, they will be unaffected: trace-cmd reset -b 1024 SEE ALSO -------- trace-cmd(1), trace-cmd-record(1), trace-cmd-report(1), trace-cmd-start(1), trace-cmd-stop(1), trace-cmd-extract(1), trace-cmd-split(1), trace-cmd-list(1), trace-cmd-listen(1) AUTHOR ------ Written by Steven Rostedt, RESOURCES --------- https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/ COPYING ------- Copyright \(C) 2010 Red Hat, Inc. Free use of this software is granted under the terms of the GNU Public License (GPL). trace-cmd-v3.3.1/Documentation/trace-cmd/trace-cmd-restore.1.txt000066400000000000000000000077411470231550600244470ustar00rootroot00000000000000TRACE-CMD-RESTORE(1) ==================== NAME ---- trace-cmd-restore - restore a failed trace record SYNOPSIS -------- *trace-cmd restore* ['OPTIONS'] ['command'] cpu-file [cpu-file ...] DESCRIPTION ----------- The trace-cmd(1) restore command will restore a crashed trace-cmd-record(1) file. If for some reason a trace-cmd record fails, it will leave a the per-cpu data files and not create the final trace.dat file. The trace-cmd restore will append the files to create a working trace.dat file that can be read with trace-cmd-report(1). When trace-cmd record runs, it spawns off a process per CPU and writes to a per cpu file usually called 'trace.dat.cpuX', where X represents the CPU number that it is tracing. If the -o option was used in the trace-cmd record, then the CPU data files will have that name instead of the 'trace.dat' name. If a unexpected crash occurs before the tracing is finished, then the per CPU files will still exist but there will not be any trace.dat file to read from. trace-cmd restore will allow you to create a trace.dat file with the existing data files. OPTIONS ------- *-c*:: Create a partial trace.dat file from the machine, to be used with a full trace-cmd restore at another time. This option is useful for embedded devices. If a server contains the cpu files of a crashed trace-cmd record (or trace-cmd listen), trace-cmd restore can be executed on the embedded device with the -c option to get all the stored information of that embedded device. Then the file created could be copied to the server to run the trace-cmd restore there with the cpu files. If *-o* is not specified, then the file created will be called 'trace-partial.dat'. This is because the file is not a full version of something that trace-cmd-report(1) could use. *-t* tracing_dir:: Used with *-c*, it overrides the location to read the events from. By default, tracing information is read from the debugfs/tracing directory. *-t* will use that location instead. This can be useful if the trace.dat file to create is from another machine. Just tar -cvf events.tar debugfs/tracing and copy and untar that file locally, and use that directory instead. *-k* kallsyms:: Used with *-c*, it overrides where to read the kallsyms file from. By default, /proc/kallsyms is used. *-k* will override the file to read the kallsyms from. This can be useful if the trace.dat file to create is from another machine. Just copy the /proc/kallsyms file locally, and use *-k* to point to that file. *-o* output':: By default, trace-cmd restore will create a 'trace.dat' file (or 'trace-partial.dat' if *-c* is specified). You can specify a different file to write to with the *-o* option. *-i* input:: By default, trace-cmd restore will read the information of the current system to create the initial data stored in the 'trace.dat' file. If the crash was on another machine, then that machine should have the trace-cmd restore run with the *-c* option to create the trace.dat partial file. Then that file can be copied to the current machine where trace-cmd restore will use *-i* to load that file instead of reading from the current system. EXAMPLES -------- If a crash happened on another box, you could run: $ trace-cmd restore -c -o box-partial.dat Then on the server that has the cpu files: $ trace-cmd restore -i box-partial.dat trace.dat.cpu0 trace.dat.cpu1 This would create a trace.dat file for the embedded box. SEE ALSO -------- trace-cmd(1), trace-cmd-record(1), trace-cmd-report(1), trace-cmd-start(1), trace-cmd-stop(1), trace-cmd-extract(1), trace-cmd-reset(1), trace-cmd-split(1), trace-cmd-list(1), trace-cmd-listen(1) AUTHOR ------ Written by Steven Rostedt, RESOURCES --------- https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/ COPYING ------- Copyright \(C) 2010 Red Hat, Inc. Free use of this software is granted under the terms of the GNU Public License (GPL). trace-cmd-v3.3.1/Documentation/trace-cmd/trace-cmd-set.1.txt000066400000000000000000000253211470231550600235510ustar00rootroot00000000000000TRACE-CMD-SET(1) ================ NAME ---- trace-cmd-set - set a configuration parameter of the Ftrace Linux internal tracer SYNOPSIS -------- *trace-cmd set* ['OPTIONS'] ['command'] DESCRIPTION ----------- The trace-cmd(1) set command will set a configuration parameter of the Ftrace Linux kernel tracer. The specified *command* will be run after the ftrace state is set. The configured ftrace state can be restored to default using the trace-cmd-reset(1) command. OPTIONS ------- *-p* 'tracer':: Specify a tracer. Tracers usually do more than just trace an event. Common tracers are: *function*, *function_graph*, *preemptirqsoff*, *irqsoff*, *preemptoff* and *wakeup*. A tracer must be supported by the running kernel. To see a list of available tracers, see trace-cmd-list(1). *-e* 'event':: Specify an event to trace. Various static trace points have been added to the Linux kernel. They are grouped by subsystem where you can enable all events of a given subsystem or specify specific events to be enabled. The 'event' is of the format "subsystem:event-name". You can also just specify the subsystem without the ':event-name' or the event-name without the "subsystem:". Using "-e sched_switch" will enable the "sched_switch" event where as, "-e sched" will enable all events under the "sched" subsystem. The 'event' can also contain glob expressions. That is, "*stat*" will select all events (or subsystems) that have the characters "stat" in their names. The keyword 'all' can be used to enable all events. *-T*:: Enable a stacktrace on each event. For example: -0 [003] 58549.289091: sched_switch: kworker/0:1:0 [120] R ==> trace-cmd:2603 [120] -0 [003] 58549.289092: kernel_stack: => schedule (ffffffff814b260e) => cpu_idle (ffffffff8100a38c) => start_secondary (ffffffff814ab828) *--func-stack*:: Enable a stack trace on all functions. Note this is only applicable for the "function" plugin tracer, and will only take effect if the -l option is used and succeeds in limiting functions. If the function tracer is not filtered, and the stack trace is enabled, you can live lock the machine. *-f* 'filter':: Specify a filter for the previous event. This must come after a *-e*. This will filter what events get recorded based on the content of the event. Filtering is passed to the kernel directly so what filtering is allowed may depend on what version of the kernel you have. Basically, it will let you use C notation to check if an event should be processed or not. [source,bison] ---- ==, >=, <=, >, <, &, |, && and || ---- The above are usually safe to use to compare fields. *-R* 'trigger':: Specify a trigger for the previous event. This must come after a *-e*. This will add a given trigger to the given event. To only enable the trigger and not the event itself, then place the event after the *-v* option. See Documentation/trace/events.txt in the Linux kernel source for more information on triggers. *-v*:: This will negate options specified after it on the command line. It affects: [verse] -- *-e*: Causes all specified events to not be traced. This is useful for selecting a subsystem to be traced but to leave out various events. For example: "-e sched -v -e "\*stat\*"" will enable all events in the sched subsystem except those that have "stat" in their names. *-B*: Deletes the specified ftrace instance. There must be no configuration options related to this instance in the command line. For example: "-v -B bar -B foo" will delete instance bar and create a new instance foo. Note: the *-v* option was taken from the way grep(1) inverts the following matches. -- *-P* 'pid':: This will filter only the specified process IDs. Using *-P* will let you trace only events that are caused by the process. *-c*:: Used *-P* to trace the process' children too (if kernel supports it). *--user*:: Execute the specified *command* as given user. *-C* 'clock':: Set the trace clock to "clock". Use trace-cmd(1) list -C to see what clocks are available. *-l* 'function-name':: This will limit the 'function' and 'function_graph' tracers to only trace the given function name. More than one *-l* may be specified on the command line to trace more than one function. The limited use of glob expressions are also allowed. These are 'match\*' to only filter functions that start with 'match'. '\*match' to only filter functions that end with 'match'. '\*match\*' to only filter on functions that contain 'match'. *-g* 'function-name':: This option is for the function_graph plugin. It will graph the given function. That is, it will only trace the function and all functions that it calls. You can have more than one *-g* on the command line. *-n* 'function-name':: This has the opposite effect of *-l*. The function given with the *-n* option will not be traced. This takes precedence, that is, if you include the same function for both *-n* and *-l*, it will not be traced. *-d*:: Some tracer plugins enable the function tracer by default. Like the latency tracers. This option prevents the function tracer from being enabled at start up. *-D*:: The option *-d* will try to use the function-trace option to disable the function tracer (if available), otherwise it defaults to the proc file: /proc/sys/kernel/ftrace_enabled, but will not touch it if the function-trace option is available. The *-D* option will disable both the ftrace_enabled proc file as well as the function-trace option if it exists. Note, this disable function tracing for all users, which includes users outside of ftrace tracers (stack_tracer, perf, etc). *-O* 'option':: Ftrace has various options that can be enabled or disabled. This allows you to set them. Appending the text 'no' to an option disables it. For example: "-O nograph-time" will disable the "graph-time" Ftrace option. *-b* 'size':: This sets the ring buffer size to 'size' kilobytes. Because the Ftrace ring buffer is per CPU, this size is the size of each per CPU ring buffer inside the kernel. Using "-b 10000" on a machine with 4 CPUs will make Ftrace have a total buffer size of 40 Megs. *-B* 'buffer-name':: If the kernel supports multiple buffers, this will add a buffer with the given name. If the buffer name already exists, that buffer is just reset. After a buffer name is stated, all events added after that will be associated with that buffer. If no buffer is specified, or an event is specified before a buffer name, it will be associated with the main (toplevel) buffer. trace-cmd set -e sched -B block -e block -B time -e timer sleep 1 The above is will enable all sched events in the main buffer. It will then create a 'block' buffer instance and enable all block events within that buffer. A 'time' buffer instance is created and all timer events will be enabled for that event. *-m* 'size':: The max size in kilobytes that a per cpu buffer should be. Note, due to rounding to page size, the number may not be totally correct. Also, this is performed by switching between two buffers that are half the given size thus the output may not be of the given size even if much more was written. Use this to prevent running out of diskspace for long runs. *-M* 'cpumask':: Set the cpumask for to trace. It only affects the last buffer instance given. If supplied before any buffer instance, then it affects the main buffer. The value supplied must be a hex number. trace-cmd set -p function -M c -B events13 -e all -M 5 If the -M is left out, then the mask stays the same. To enable all CPUs, pass in a value of '-1'. *-i*:: By default, if an event is listed that trace-cmd does not find, it will exit with an error. This option will just ignore events that are listed on the command line but are not found on the system. *-q* | *--quiet*:: Suppresses normal output, except for errors. *--max-graph-depth* 'depth':: Set the maximum depth the function_graph tracer will trace into a function. A value of one will only show where userspace enters the kernel but not any functions called in the kernel. The default is zero, which means no limit. *--cmdlines-size* 'size':: Set the number of entries the kernel tracing file "saved_cmdlines" can contain. This file is a circular buffer which stores the mapping between cmdlines and PIDs. If full, it leads to unresolved cmdlines ("<...>") within the trace. The kernel default value is 128. *--module* 'module':: Filter a module's name in function tracing. It is equivalent to adding ':mod:module' after all other functions being filtered. If no other function filter is listed, then all modules functions will be filtered in the filter. '--module snd' is equivalent to '-l :mod:snd' '--module snd -l "*jack*"' is equivalent to '-l "*jack*:mod:snd"' '--module snd -n "*"' is equivalent to '-n :mod:snd' *--stderr*:: Have output go to stderr instead of stdout, but the output of the command executed will not be changed. This is useful if you want to monitor the output of the command being executed, but not see the output from trace-cmd. *--fork*:: If a command is listed, then trace-cmd will wait for that command to finish, unless the *--fork* option is specified. Then it will fork the command and return immediately. *--verbose*[='level']:: Set the log level. Supported log levels are "none", "critical", "error", "warning", "info", "debug", "all" or their identifiers "0", "1", "2", "3", "4", "5", "6". Setting the log level to specific value enables all logs from that and all previous levels. The level will default to "info" if one is not specified. Example: enable all critical, error and warning logs trace-cmd set --verbose=warning EXAMPLES -------- Enable all events for tracing: [source,shell] ---- # trace-cmd set -e all ---- Set the function tracer: [source,shell] ---- # trace-cmd set -p function ---- SEE ALSO -------- trace-cmd(1), trace-cmd-report(1), trace-cmd-start(1), trace-cmd-stop(1), trace-cmd-extract(1), trace-cmd-reset(1), trace-cmd-split(1), trace-cmd-list(1), trace-cmd-listen(1), trace-cmd-profile(1) AUTHOR ------ Written by Tzvetomir Stoyanov (VMware) RESOURCES --------- https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/ COPYING ------- Copyright \(C) 2010 Red Hat, Inc. Free use of this software is granted under the terms of the GNU Public License (GPL). trace-cmd-v3.3.1/Documentation/trace-cmd/trace-cmd-show.1.txt000066400000000000000000000067631470231550600237470ustar00rootroot00000000000000TRACE-CMD-SHOW(1) ================= NAME ---- trace-cmd-show - show the contents of the Ftrace Linux kernel tracing buffer. SYNOPSIS -------- *trace-cmd show* ['OPTIONS'] DESCRIPTION ----------- The trace-cmd(1) show displays the contents of one of the Ftrace Linux kernel tracing files: trace, snapshot, or trace_pipe. It is basically the equivalent of doing: cat /sys/kernel/debug/tracing/trace OPTIONS ------- *-p*:: Instead of displaying the contents of the "trace" file, use the "trace_pipe" file. The difference between the two is that the "trace" file is static. That is, if tracing is stopped, the "trace" file will show the same contents each time. The "trace_pipe" file is a consuming read, where a read of the file will consume the output of what was read and it will not read the same thing a second time even if tracing is stopped. This file als will block. If no data is available, trace-cmd show will stop and wait for data to appear. *-s*:: Instead of reading the "trace" file, read the snapshot file. The snapshot is made by an application writing into it and the kernel will perform as swap between the currently active buffer and the current snapshot buffer. If no more swaps are made, the snapshot will remain static. This is not a consuming read. *-c* 'cpu':: Read only the trace file for a specified CPU. *-f*:: Display the full path name of the file that is being displayed. *-B* 'buf':: If a buffer instance was created, then the *-B* option will access the files associated with the given buffer. *--tracing_on*:: Show if tracing is on for the given instance. *--current_tracer*:: Show what the current tracer is. *--buffer_size*:: Show the current buffer size (per-cpu) *--buffer_total_size*:: Show the total size of all buffers. *--buffer_subbuf_size*:: Show the size in kilobytes of the sub-buffers of the ring buffer. The ring buffer is broken up into equal size sub-buffers were an event can only be as big as the sub-buffer data section (the size minus its meta data). *--buffer_percent*:: Show the percentage the buffer must be filled before a reader that is blocked on the trace_pipe_raw file will be woken up. 0 : wake up immediately on any new data 1 - 99 : wake up on this percentage of the sub-buffers being full 100 : wake up after the buffer is full and the writer is on the last sub-buffer *--ftrace_filter*:: Show what function filters are set. *--ftrace_notrace*:: Show what function disabled filters are set. *--ftrace_pid*:: Show the PIDs the function tracer is limited to (if any). *--graph_function*:: Show the functions that will be graphed. *--graph_notrace*:: Show the functions that will not be graphed. *--hist* '[system:]event':: Show the content of a histogram "hist" file for a given event *--trigger* '[system:]event':: Show the content of the "trigger" file for a given event *--cpumask*:: Show the mask of CPUs that tracing will trace. SEE ALSO -------- trace-cmd(1), trace-cmd-record(1), trace-cmd-report(1), trace-cmd-start(1), trace-cmd-extract(1), trace-cmd-reset(1), trace-cmd-split(1), trace-cmd-list(1), trace-cmd-listen(1) AUTHOR ------ Written by Steven Rostedt, RESOURCES --------- https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/ COPYING ------- Copyright \(C) 2010 Red Hat, Inc. Free use of this software is granted under the terms of the GNU Public License (GPL). trace-cmd-v3.3.1/Documentation/trace-cmd/trace-cmd-snapshot.1.txt000066400000000000000000000032111470231550600246070ustar00rootroot00000000000000TRACE-CMD-SNAPSHOT(1) ===================== NAME ---- trace-cmd-snapshot - take, reset, free, or show a Ftrace kernel snapshot SYNOPSIS -------- *trace-cmd snapshot* ['OPTIONS'] DESCRIPTION ----------- The trace-cmd(1) snapshot controls or displays the Ftrace Linux kernel snapshot feature (if the kernel supports it). This is useful to "freeze" an instance of a live trace but without stopping the trace. trace-cmd start -p function trace-cmd snapshot -s trace-cmd snapshot [ dumps the content of buffer at 'trace-cmd snapshot -s' ] trace-cmd snapshot -s trace-cmd snapshot [ dumps the new content of the buffer at the last -s operation ] OPTIONS ------- *-s*:: Take a snapshot of the currently running buffer. *-r*:: Clear out the buffer. *-f*:: Free the snapshot buffer. The buffer takes up memory inside the kernel. It is best to free it when not in use. The first -s operation will allocate it if it is not already allocated. *-c* 'cpu':: Operate on a per cpu snapshot (may not be fully supported by all kernels) *-B* 'buf':: If a buffer instance was created, then the *-B* option will operate on the snapshot within the buffer. SEE ALSO -------- trace-cmd(1), trace-cmd-record(1), trace-cmd-report(1), trace-cmd-start(1), trace-cmd-extract(1), trace-cmd-reset(1), trace-cmd-split(1), trace-cmd-list(1), trace-cmd-listen(1) AUTHOR ------ Written by Steven Rostedt, RESOURCES --------- https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/ COPYING ------- Copyright \(C) 2010 Red Hat, Inc. Free use of this software is granted under the terms of the GNU Public License (GPL). trace-cmd-v3.3.1/Documentation/trace-cmd/trace-cmd-split.1.txt000066400000000000000000000075101470231550600241110ustar00rootroot00000000000000TRACE-CMD-SPLIT(1) ================== NAME ---- trace-cmd-split - split a trace.dat file into smaller files SYNOPSIS -------- *trace-cmd split* ['OPTIONS'] ['start-time' ['end-time']] DESCRIPTION ----------- The trace-cmd(1) split is used to break up a trace.dat into small files. The 'start-time' specifies where the new file will start at. Using 'trace-cmd-report(1)' and copying the time stamp given at a particular event, can be used as input for either 'start-time' or 'end-time'. The split will stop creating files when it reaches an event after 'end-time'. If only the end-time is needed, use 0.0 as the start-time. If start-time is left out, then the split will start at the beginning of the file. If end-time is left out, then split will continue to the end unless it meets one of the requirements specified by the options. OPTIONS ------- *-i* 'file':: If this option is not specified, then the split command will look for the file named 'trace.dat'. This options will allow the reading of another file other than 'trace.dat'. *-o* 'file':: By default, the split command will use the input file name as a basis of where to write the split files. The output file will be the input file with an attached \'.#\' to the end: trace.dat.1, trace.dat.2, etc. This option will change the name of the base file used. -o file will create file.1, file.2, etc. *-s* 'seconds':: This specifies how many seconds should be recorded before the new file should stop. *-m* 'milliseconds':: This specifies how many milliseconds should be recorded before the new file should stop. *-u* 'microseconds':: This specifies how many microseconds should be recorded before the new file should stop. *-e* 'events':: This specifies how many events should be recorded before the new file should stop. *-p* 'pages':: This specifies the number of pages that should be recorded before the new file should stop. Note: only one of *-p*, *-e*, *-u*, *-m*, *-s* may be specified at a time. If *-p* is specified, then *-c* is automatically set. *-r*:: This option causes the break up to repeat until end-time is reached (or end of the input if end-time is not specified). trace-cmd split -r -e 10000 This will break up trace.dat into several smaller files, each with at most 10,000 events in it. *-c*:: This option causes the above break up to be per CPU. trace-cmd split -c -p 10 This will create a file that has 10 pages per each CPU from the input. *-C* 'cpu':: This option will split for a single CPU. Only the cpu named will be extracted from the file. trace-cmd split -C 1 This will split out all the events for cpu 1 in the file. *--top*:: This allows to keep the top buffer. The top buffer can be renamed using the '-b' option. trace-cmd split --top This will keep only the top buffer. trace-cmd split --top -b old_top This will keep only the top buffer and rename it 'old_top'. *-B* 'buffer':: This allows to keep the selected buffer. A buffer can be promoted to the top buffer using the '-t' option. trace-cmd split -B timer -B sched This will keep the 'timer' and 'sched' buffers. trace-cmd split -B timer -t -B sched This will keep the 'timer' and 'sched' buffers, with the events from the 'timer' buffer promoted to the top instance. SEE ALSO -------- trace-cmd(1), trace-cmd-record(1), trace-cmd-report(1), trace-cmd-start(1), trace-cmd-stop(1), trace-cmd-extract(1), trace-cmd-reset(1), trace-cmd-list(1), trace-cmd-listen(1) AUTHOR ------ Written by Steven Rostedt, RESOURCES --------- https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/ COPYING ------- Copyright \(C) 2010 Red Hat, Inc. Free use of this software is granted under the terms of the GNU Public License (GPL). trace-cmd-v3.3.1/Documentation/trace-cmd/trace-cmd-sqlhist.1.txt000066400000000000000000000432301470231550600244440ustar00rootroot00000000000000TRACE-CMD-SQLHIST(1) ==================== NAME ---- trace-cmd-sqlhist - Use SQL language to create / show creation of tracefs histograms and synthetic events SYNOPSIS -------- *trace-cmd sqlhist* ['OPTIONS'] ['SQL-select-command'] DESCRIPTION ----------- The trace-cmd sqlhist(1) will take an SQL like statement to create tracefs histograms and synthetic events that can perform various actions for various handling of the data. The tracefs file system interfaces with the Linux tracing infrastructure that has various dynamic and static events through out the kernel. Each of these events can have a "histogram" attached to it, where the fields of the event will define the buckets of the histogram. A synthetic event is a way to attach two separate events and use the fields and time stamps of those events to create a new dynamic event. This new dynamic event is call a synthetic event. The fields of each event can have simple calculations done on them where, for example, the delta between a field of one event to a field of the other event can be taken. This also works for the time stamps of the events where the time delta between the two events can also be extracted and placed into the synthetic event. Other actions can be done from the fields of the events. A snapshot can be taken of the kernel ring buffer a variable used in the synthetic event creating hits a max, or simply changes. The commands to create histograms and synthetic events are complex and not easy to remember. *trace-cmd sqlhist* is used to convert SQL syntax into the commands needed to create the histogram or synthetic event. The *SQL-select-command* is a SQL string defined by *tracefs_sqlhist*(3). Note, this must be run as root (or sudo) as interacting with the tracefs directory requires root privilege, unless the *-t* option is given with a copy of the _tracefs_ directory and its events. OPTIONS ------- *-n* 'name':: The name of the synthetic event to create. This event can then be used like any other event, and enabled via *trace-cmd record*(1). *-t* 'tracefs-dir':: In order to test this out as non root user, a copy of the tracefs directory can be used, and passing that directory with this option will allow the program to work. Obviously, *-e* will not work as non-root because it will not be able to execute. # mkdir /tmp/tracing # cp -r /sys/kernel/tracing/events /tmp/tracing # exit $ trace-cmd sqlhist -t /tmp/tracing ... *-e*:: Not only display the commands to create the histogram, but also execute them. This requires root privilege. *-f* 'file':: Instead of reading the SQL commands from the command line, read them from _file_. If _file_ is '-' then read from standard input. *-m* 'var':: Do the given action when the variable _var_ hits a new maximum. This can not be used with *-c*. The _var_ must be defined in the *SQL-select-command*. *-c* 'var':: Do the given action when the variable _var_ changes its value. This can not be used with *-m*. The _var_ must be defined in the *SQL-select-command*. *-s*:: Perform a snapshot instead of calling the synthetic event. *-T*:: Perform both a snapshot and trace the synthetic event. *-S* 'fields[,fields]':: Save the given fields. The fields must be fields of the "end" event given in the *SQL-select-command* *-B* 'instance':: For simple statements that only produce a histogram, the instance given here will be where the histogram will be created. This is ignored for full synthetic event creation, as sythetic events have a global affect on all tracing instances, where as, histograms only affect a single instance. EXAMPLES -------- As described above, for testing purposes, make a copy of the event directory: [source, c] -- $ mkdir /tmp/tracing $ sudo cp -r /sys/kernel/tracing/events /tmp/tracing/ $ sudo chmod -R 0644 /tmp/tracing/ -- For an example of simple histogram output using the copy of the tracefs directory. [source, c] -- $ trace-cmd sqlhist -t /tmp/tracing/ 'SELECT CAST(call_site as SYM-OFFSET), bytes_req, CAST(bytes_alloc AS _COUNTER_) FROM kmalloc' -- Produces the output: [source, c] -- echo 'hist:keys=call_site.sym-offset,bytes_req:vals=bytes_alloc' > /sys/kernel/tracing/events/kmem/kmalloc/trigger -- Which could be used by root: [source, c] -- # echo 'hist:keys=call_site.sym-offset,bytes_req:vals=bytes_alloc' > /sys/kernel/tracing/events/kmem/kmalloc/trigger # cat /sys/kernel/tracing/events/kmem/kmalloc/hist # event histogram # # trigger info: hist:keys=call_site.sym-offset,bytes_req:vals=hitcount,bytes_alloc:sort=hitcount:size=2048 [active] # { call_site: [ffffffff813f8d8a] load_elf_phdrs+0x4a/0xb0 , bytes_req: 728 } hitcount: 1 bytes_alloc: 1024 { call_site: [ffffffffc0c69e74] nf_ct_ext_add+0xd4/0x1d0 [nf_conntrack] , bytes_req: 128 } hitcount: 1 bytes_alloc: 128 { call_site: [ffffffff818355e6] dma_resv_get_fences+0xf6/0x440 , bytes_req: 8 } hitcount: 1 bytes_alloc: 8 { call_site: [ffffffffc06dc73f] intel_gt_get_buffer_pool+0x15f/0x290 [i915] , bytes_req: 424 } hitcount: 1 bytes_alloc: 512 { call_site: [ffffffff813f8d8a] load_elf_phdrs+0x4a/0xb0 , bytes_req: 616 } hitcount: 1 bytes_alloc: 1024 { call_site: [ffffffff8161a44c] __sg_alloc_table+0x11c/0x180 , bytes_req: 32 } hitcount: 1 bytes_alloc: 32 { call_site: [ffffffffc070749d] shmem_get_pages+0xad/0x5d0 [i915] , bytes_req: 16 } hitcount: 1 bytes_alloc: 16 { call_site: [ffffffffc07507f5] intel_framebuffer_create+0x25/0x60 [i915] , bytes_req: 408 } hitcount: 1 bytes_alloc: 512 { call_site: [ffffffffc06fc20f] eb_parse+0x34f/0x910 [i915] , bytes_req: 408 } hitcount: 1 bytes_alloc: 512 { call_site: [ffffffffc0700ebd] i915_gem_object_get_pages_internal+0x5d/0x270 [i915] , bytes_req: 16 } hitcount: 1 bytes_alloc: 16 { call_site: [ffffffffc0771188] intel_frontbuffer_get+0x38/0x220 [i915] , bytes_req: 400 } hitcount: 1 bytes_alloc: 512 { call_site: [ffffffff8161a44c] __sg_alloc_table+0x11c/0x180 , bytes_req: 128 } hitcount: 1 bytes_alloc: 128 { call_site: [ffffffff813f8f45] load_elf_binary+0x155/0x1680 , bytes_req: 28 } hitcount: 1 bytes_alloc: 32 { call_site: [ffffffffc07038c8] __assign_mmap_offset+0x208/0x3d0 [i915] , bytes_req: 288 } hitcount: 1 bytes_alloc: 512 { call_site: [ffffffff813737b2] alloc_bprm+0x32/0x2f0 , bytes_req: 416 } hitcount: 1 bytes_alloc: 512 { call_site: [ffffffff813f9027] load_elf_binary+0x237/0x1680 , bytes_req: 64 } hitcount: 1 bytes_alloc: 64 { call_site: [ffffffff8161a44c] __sg_alloc_table+0x11c/0x180 , bytes_req: 64 } hitcount: 1 bytes_alloc: 64 { call_site: [ffffffffc040ffe7] drm_vma_node_allow+0x27/0xe0 [drm] , bytes_req: 40 } hitcount: 2 bytes_alloc: 128 { call_site: [ffffffff813cda98] __do_sys_timerfd_create+0x58/0x1c0 , bytes_req: 336 } hitcount: 2 bytes_alloc: 1024 { call_site: [ffffffff818355e6] dma_resv_get_fences+0xf6/0x440 , bytes_req: 40 } hitcount: 2 bytes_alloc: 128 { call_site: [ffffffff8139b75a] single_open+0x2a/0xa0 , bytes_req: 32 } hitcount: 2 bytes_alloc: 64 { call_site: [ffffffff815df715] bio_kmalloc+0x25/0x80 , bytes_req: 136 } hitcount: 2 bytes_alloc: 384 { call_site: [ffffffffc071e5cd] i915_vma_work+0x1d/0x50 [i915] , bytes_req: 416 } hitcount: 3 bytes_alloc: 1536 { call_site: [ffffffff81390d0d] alloc_fdtable+0x4d/0x100 , bytes_req: 56 } hitcount: 3 bytes_alloc: 192 { call_site: [ffffffffc06ff65f] i915_gem_do_execbuffer+0x158f/0x2440 [i915] , bytes_req: 16 } hitcount: 4 bytes_alloc: 64 { call_site: [ffffffff8137713c] alloc_pipe_info+0x5c/0x230 , bytes_req: 384 } hitcount: 5 bytes_alloc: 2560 { call_site: [ffffffff813771b4] alloc_pipe_info+0xd4/0x230 , bytes_req: 640 } hitcount: 5 bytes_alloc: 5120 { call_site: [ffffffff81834cdb] dma_resv_list_alloc+0x1b/0x40 , bytes_req: 40 } hitcount: 6 bytes_alloc: 384 { call_site: [ffffffff81834cdb] dma_resv_list_alloc+0x1b/0x40 , bytes_req: 56 } hitcount: 9 bytes_alloc: 576 { call_site: [ffffffff8120086e] tracing_map_sort_entries+0x9e/0x3e0 , bytes_req: 24 } hitcount: 60 bytes_alloc: 1920 Totals: Hits: 122 Entries: 30 Dropped: 0 -- Note, although the examples use uppercase for the SQL keywords, they do not have to be. 'SELECT' could also be 'select' or even 'sElEcT'. By using the full SQL language, synthetic events can be made and processed. For example, using *trace-cmd sqlhist* along with *trace-cmd record*(1), wake up latency can be recorded by creating a synthetic event by attaching the _sched_waking_ and the _sched_switch_ events. [source, c] -- # trace-cmd sqlhist -n wakeup_lat -e -T -m lat 'SELECT end.next_comm AS comm, (end.TIMESTAMP_USECS - start.TIMESTAMP_USECS) AS lat FROM ' \ 'sched_waking AS start JOIN sched_switch AS end ON start.pid = end.next_pid WHERE end.next_prio < 100 && end.next_comm == "cyclictest"' # trace-cmd start -e all -e wakeup_lat -R stacktrace # cyclictest -l 1000 -p80 -i250 -a -t -q -m -d 0 -b 1000 --tracemark # trace-cmd show -s | tail -30 -0 [002] dNh4 23454.902246: sched_wakeup: comm=cyclictest pid=12272 prio=120 target_cpu=002 -0 [005] ...1 23454.902246: cpu_idle: state=4294967295 cpu_id=5 -0 [007] d..1 23454.902246: cpu_idle: state=0 cpu_id=7 -0 [002] dNh1 23454.902247: hrtimer_expire_exit: hrtimer=0000000037956dc2 -0 [005] d..1 23454.902248: cpu_idle: state=0 cpu_id=5 -0 [002] dNh1 23454.902248: write_msr: 6e0, value 4866ce957272 -0 [006] ...1 23454.902248: cpu_idle: state=4294967295 cpu_id=6 -0 [002] dNh1 23454.902249: local_timer_exit: vector=236 -0 [006] d..1 23454.902250: cpu_idle: state=0 cpu_id=6 -0 [002] .N.1 23454.902250: cpu_idle: state=4294967295 cpu_id=2 -0 [002] dN.1 23454.902251: rcu_utilization: Start context switch -0 [002] dN.1 23454.902252: rcu_utilization: End context switch -0 [001] ...1 23454.902252: cpu_idle: state=4294967295 cpu_id=1 -0 [002] dN.3 23454.902253: prandom_u32: ret=3692516021 -0 [001] d..1 23454.902254: cpu_idle: state=0 cpu_id=1 -0 [002] d..2 23454.902254: sched_switch: prev_comm=swapper/2 prev_pid=0 prev_prio=120 prev_state=R ==> next_comm=cyclictest next_pid=12275 next_prio=19 -0 [002] d..4 23454.902256: wakeup_lat: next_comm=cyclictest lat=17 -0 [002] d..5 23454.902258: => trace_event_raw_event_synth => action_trace => event_hist_trigger => event_triggers_call => trace_event_buffer_commit => trace_event_raw_event_sched_switch => __traceiter_sched_switch => __schedule => schedule_idle => do_idle => cpu_startup_entry => secondary_startup_64_no_verify -- Here's the options for above example explained: *-n wakeup_lat* :: Name the synthetic event to use *wakeup_lat*. *-e*:: Execute the commands that are printed. *-T*:: Perform both a trace action and then a snapshot action (swap the buffer into the static 'snapshot' buffer). *-m lat*:: Trigger the actions whenever 'lat' hits a new maximum value. Now a breakdown of the SQL statement: [source, c] -- 'SELECT end.next_comm AS comm, (end.TIMESTAMP_USECS - start.TIMESTAMP_USECS) AS lat FROM ' \ 'sched_waking AS start JOIN sched_switch AS end ON start.pid = end.next_pid WHERE end.next_prio < 100 && end.next_comm == "cyclictest"' -- *end.next_comm AS comm*:: Save the 'sched_switch' field *next_comm* and place it into the *comm* field of the 'wakeup_lat' synthetic event. *(end.TIMESTAMP_USECS - start.TIMESTAMP_USECS) AS lat*:: Take the delta of the time stamps from the 'sched_switch' event and the 'sched_waking' event. As time stamps are usually recorded in nanoseconds, *TIMESTAMP* would give the full nanosecond time stamp, but here, the *TIMESTAMP_USECS* will truncate it into microseconds. The value is saved in the variable *lat*, which will also be recorded in the synthetic event. *FROM 'sched_waking' AS start JOIN sched_switch AS end ON start.pid = end.next_pid*:: Create the synthetic event by joining _sched_waking_ to _sched_switch_, matching the _sched_waking_ 'pid' field with the _sched_switch_ 'next_pid' field. Also make *start* an alias for _sched_waking_ and *end* an alias for _sched_switch_ which then an use *start* and *end* as a subsitute for _sched_waking_ and _sched_switch_ respectively through out the rest of the SQL statement. *WHERE end.next_prio < 100 && end.next_comm == "cyclictest"*:: Filter the logic where it executes only if _sched_waking_ 'next_prio' field is less than 100. (Note, in the Kernel, priorities are inverse, and the real-time priorities are represented from 0-100 where 0 is the highest priority). Also only trace when the 'next_comm' (the task scheduling in) of the _sched_switch_ event has the name "cyclictest". For the *trace-cmd*(3) command: [source, c] -- trace-cmd start -e all -e wakeup_lat -R stacktrace -- *trace-cmd start*:: Enables tracing (does not record to a file). *-e all*:: Enable all events *-e wakeup_lat -R stacktrace*:: have the "wakeup_lat" event (our synthetic event) enable the *stacktrace* trigger, were for every instance of the "wakeup_lat" event, a kernel stack trace will be recorded in the ring buffer. After calling *cyclictest* (a real-time tool to measure wakeup latency), read the snapshot buffer. *trace-cmd show -s*:: *trace-cmd show* reads the kernel ring buffer, and the *-s* option will read the *snapshot* buffer instead of the normal one. [source, c] -- -0 [002] d..4 23454.902256: wakeup_lat: next_comm=cyclictest lat=17 -- We see on the "wakeup_lat" event happened on CPU 2, with a wake up latency 17 microseconds. This can be extracted into a *trace.dat* file that *trace-cmd*(3) can read and do further analysis, as well as *kernelshark*. [source, c] -- # trace-cmd extract -s # trace-cmd report --cpu 2 | tail -30 -0 [002] 23454.902238: prandom_u32: ret=1633425088 -0 [002] 23454.902239: sched_wakeup: cyclictest:12275 [19] CPU:002 -0 [002] 23454.902241: hrtimer_expire_exit: hrtimer=0xffffbbd68286fe60 -0 [002] 23454.902241: hrtimer_cancel: hrtimer=0xffffbbd6826efe70 -0 [002] 23454.902242: hrtimer_expire_entry: hrtimer=0xffffbbd6826efe70 now=23455294430750 function=hrtimer_wakeup/0x0 -0 [002] 23454.902243: sched_waking: comm=cyclictest pid=12272 prio=120 target_cpu=002 -0 [002] 23454.902244: prandom_u32: ret=1102749734 -0 [002] 23454.902246: sched_wakeup: cyclictest:12272 [120] CPU:002 -0 [002] 23454.902247: hrtimer_expire_exit: hrtimer=0xffffbbd6826efe70 -0 [002] 23454.902248: write_msr: 6e0, value 4866ce957272 -0 [002] 23454.902249: local_timer_exit: vector=236 -0 [002] 23454.902250: cpu_idle: state=4294967295 cpu_id=2 -0 [002] 23454.902251: rcu_utilization: Start context switch -0 [002] 23454.902252: rcu_utilization: End context switch -0 [002] 23454.902253: prandom_u32: ret=3692516021 -0 [002] 23454.902254: sched_switch: swapper/2:0 [120] R ==> cyclictest:12275 [19] -0 [002] 23454.902256: wakeup_lat: next_comm=cyclictest lat=17 -0 [002] 23454.902258: kernel_stack: => trace_event_raw_event_synth (ffffffff8121a0db) => action_trace (ffffffff8121e9fb) => event_hist_trigger (ffffffff8121ca8d) => event_triggers_call (ffffffff81216c72) => trace_event_buffer_commit (ffffffff811f7618) => trace_event_raw_event_sched_switch (ffffffff8110fda4) => __traceiter_sched_switch (ffffffff8110d449) => __schedule (ffffffff81c02002) => schedule_idle (ffffffff81c02c86) => do_idle (ffffffff8111e898) => cpu_startup_entry (ffffffff8111eba9) => secondary_startup_64_no_verify (ffffffff81000107) -- SEE ALSO -------- trace-cmd(1), tracefs_sqlhist(3) AUTHOR ------ Written by Steven Rostedt, RESOURCES --------- https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/ COPYING ------- Copyright \(C) 2021 , Inc. Free use of this software is granted under the terms of the GNU Public License (GPL). trace-cmd-v3.3.1/Documentation/trace-cmd/trace-cmd-stack.1.txt000066400000000000000000000030221470231550600240550ustar00rootroot00000000000000TRACE-CMD-STACK(1) ================== NAME ---- trace-cmd-stack - read, enable or disable Ftrace Linux kernel stack tracing. SYNOPSIS -------- *trace-cmd stack* DESCRIPTION ----------- The trace-cmd(1) stack enables the Ftrace stack tracer within the kernel. The stack tracer enables the function tracer and at each function call within the kernel, the stack is checked. When a new maximum usage stack is discovered, it is recorded. When no option is used, the current stack is displayed. To enable the stack tracer, use the option *--start*, and to disable the stack tracer, use the option *--stop*. The output will be the maximum stack found since the start was enabled. Use *--reset* to reset the stack counter to zero. User *--verbose*[='level'] to set the log level. Supported log levels are "none", "critical", "error", "warning", "info", "debug", "all" or their identifiers "0", "1", "2", "3", "4", "5", "6". Setting the log level to specific value enables all logs from that and all previous levels. The level will default to "info" if one is not specified. SEE ALSO -------- trace-cmd(1), trace-cmd-record(1), trace-cmd-report(1), trace-cmd-start(1), trace-cmd-extract(1), trace-cmd-reset(1), trace-cmd-split(1), trace-cmd-list(1), trace-cmd-listen(1) AUTHOR ------ Written by Steven Rostedt, RESOURCES --------- https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/ COPYING ------- Copyright \(C) 2010 Red Hat, Inc. Free use of this software is granted under the terms of the GNU Public License (GPL). trace-cmd-v3.3.1/Documentation/trace-cmd/trace-cmd-start.1.txt000066400000000000000000000030561470231550600241140ustar00rootroot00000000000000TRACE-CMD-START(1) ================== NAME ---- trace-cmd-start - start the Ftrace Linux kernel tracer without recording SYNOPSIS -------- *trace-cmd start* ['OPTIONS'] DESCRIPTION ----------- The trace-cmd(1) start enables all the Ftrace tracing the same way trace-cmd-record(1) does. The difference is that it does not run threads to create a trace.dat file. This is useful just to enable Ftrace and you are only interested in the trace after some event has occurred and the trace is stopped. Then the trace can be read straight from the Ftrace pseudo file system or can be extracted with trace-cmd-extract(1). OPTIONS ------- The options are the same as 'trace-cmd-record(1)', except that it does not take options specific to recording (*-s*, *-o*, *-N*, and *-t*). *--fork* :: This option is only available for trace-cmd start. It tells trace-cmd to not wait for the process to finish before returning. With this option, trace-cmd start will return right after it forks the process on the command line. This option only has an effect if trace-cmd start also executes a command. SEE ALSO -------- trace-cmd(1), trace-cmd-record(1), trace-cmd-report(1), trace-cmd-stop(1), trace-cmd-extract(1), trace-cmd-reset(1), trace-cmd-split(1), trace-cmd-list(1), trace-cmd-listen(1) AUTHOR ------ Written by Steven Rostedt, RESOURCES --------- https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/ COPYING ------- Copyright \(C) 2010 Red Hat, Inc. Free use of this software is granted under the terms of the GNU Public License (GPL). trace-cmd-v3.3.1/Documentation/trace-cmd/trace-cmd-stat.1.txt000066400000000000000000000044011470231550600237250ustar00rootroot00000000000000TRACE-CMD-STAT(1) ================= NAME ---- trace-cmd-stat - show the status of the tracing (ftrace) system SYNOPSIS -------- *trace-cmd stat* ['OPTIONS'] DESCRIPTION ----------- The trace-cmd(1) stat displays the various status of the tracing (ftrace) system. The status that it shows is: *Instances:* List all configured ftrace instances. *Tracer:* if one of the tracers (like function_graph) is active. Otherwise nothing is displayed. *Events:* Lists the events that are enable. *Event filters:* Shows any filters that are set for any events *Function filters:* Shows any filters for the function tracers *Graph functions:* Shows any functions that the function graph tracer should graph *Buffers:* Shows the trace buffer size if they have been expanded. By default, tracing buffers are in a compressed format until they are used. If they are compressed, the buffer display will not be shown. *Trace clock:* If the tracing clock is anything other than the default "local" it will be displayed. *Trace CPU mask:* If not all available CPUs are in the tracing CPU mask, then the tracing CPU mask will be displayed. *Trace max latency:* Shows the value of the trace max latency if it is other than zero. *Kprobes:* Shows any kprobes that are defined for tracing. *Uprobes:* Shows any uprobes that are defined for tracing. *Error log:* Dump the content of ftrace error_log file. OPTIONS ------- *-B* 'buffer-name':: Display the status of a given buffer instance. May be specified more than once to display the status of multiple instances. *-t*:: If *-B* is also specified, show the status of the top level tracing directory as well as the instance(s). *-o*:: Display the all the options along with their values. If they start with "no", then the option is disabled. SEE ALSO -------- trace-cmd(1), trace-cmd-record(1), trace-cmd-report(1), trace-cmd-start(1), trace-cmd-stop(1), trace-cmd-extract(1), trace-cmd-reset(1), trace-cmd-split(1), trace-cmd-listen(1) AUTHOR ------ Written by Steven Rostedt, RESOURCES --------- https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/ COPYING ------- Copyright \(C) 2014 Red Hat, Inc. Free use of this software is granted under the terms of the GNU Public License (GPL). trace-cmd-v3.3.1/Documentation/trace-cmd/trace-cmd-stop.1.txt000066400000000000000000000037631470231550600237510ustar00rootroot00000000000000TRACE-CMD-STOP(1) ================= NAME ---- trace-cmd-stop - stop the Ftrace Linux kernel tracer from writing to the ring buffer. SYNOPSIS -------- *trace-cmd stop* ['OPTIONS'] DESCRIPTION ----------- The trace-cmd(1) stop is a complement to 'trace-cmd-start(1)'. This will disable Ftrace from writing to the ring buffer. This does not stop the overhead that the tracing may incur. Only the updating of the ring buffer is disabled, the Ftrace tracing may still be inducing overhead. After stopping the trace, the 'trace-cmd-extract(1)' may strip out the data from the ring buffer and create a trace.dat file. The Ftrace pseudo file system may also be examined. To disable the tracing completely to remove the overhead it causes, use 'trace-cmd-reset(1)'. But after a reset is performed, the data that has been recorded is lost. OPTIONS ------- *-B* 'buffer-name':: If the kernel supports multiple buffers, this will stop the trace for only the given buffer. It does not affect any other buffer. This may be used multiple times to specify different buffers. When this option is used, the top level instance will not be stopped unless *-t* is given. *-a*:: Stop the trace for all existing buffer instances. When this option is used, the top level instance will not be stopped unless *-t* is given. *-t*:: Stops the top level instance buffer. Without the *-B* or *-a* option this is the same as the default. But if *-B* or *-a* is used, this is required if the top level instance buffer should also be stopped. SEE ALSO -------- trace-cmd(1), trace-cmd-record(1), trace-cmd-report(1), trace-cmd-start(1), trace-cmd-extract(1), trace-cmd-reset(1), trace-cmd-split(1), trace-cmd-list(1), trace-cmd-listen(1) AUTHOR ------ Written by Steven Rostedt, RESOURCES --------- https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/ COPYING ------- Copyright \(C) 2010 Red Hat, Inc. Free use of this software is granted under the terms of the GNU Public License (GPL). trace-cmd-v3.3.1/Documentation/trace-cmd/trace-cmd-stream.1.txt000066400000000000000000000026751470231550600242600ustar00rootroot00000000000000TRACE-CMD-STREAM(1) =================== NAME ---- trace-cmd-stream - stream a trace to stdout as it is happening SYNOPSIS -------- *trace-cmd stream ['OPTIONS']* ['command'] DESCRIPTION ----------- The trace-cmd(1) stream will start tracing just like trace-cmd-record(1), except it will not record to a file and instead it will read the binary buffer as it is happening, convert it to a human readable format and write it to stdout. This is basically the same as trace-cmd-start(1) and then doing a trace-cmd-show(1) with the *-p* option. trace-cmd-stream is not as efficient as reading from the pipe file as most of the stream work is done in userspace. This is useful if it is needed to do the work mostly in userspace instead of the kernel, and stream also helps to debug trace-cmd-profile(1) which uses the stream code to perform the live data analysis for the profile. OPTIONS ------- These are the same as trace-cmd-record(1), except that it does not take the *-o* option. SEE ALSO -------- trace-cmd(1), trace-cmd-record(1), trace-cmd-report(1), trace-cmd-start(1), trace-cmd-stop(1), trace-cmd-reset(1), trace-cmd-split(1), trace-cmd-list(1), trace-cmd-listen(1) AUTHOR ------ Written by Steven Rostedt, RESOURCES --------- https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/ COPYING ------- Copyright \(C) 2014 Red Hat, Inc. Free use of this software is granted under the terms of the GNU Public License (GPL). trace-cmd-v3.3.1/Documentation/trace-cmd/trace-cmd.1.txt000066400000000000000000000061231470231550600227570ustar00rootroot00000000000000TRACE-CMD(1) ============ NAME ---- trace-cmd - interacts with Ftrace Linux kernel internal tracer SYNOPSIS -------- *trace-cmd* 'COMMAND' ['OPTIONS'] DESCRIPTION ----------- The trace-cmd(1) command interacts with the Ftrace tracer that is built inside the Linux kernel. It interfaces with the Ftrace specific files found in the debugfs file system under the tracing directory. A 'COMMAND' must be specified to tell trace-cmd what to do. COMMANDS -------- record - record a live trace and write a trace.dat file to the local disk or to the network. set - set a ftrace configuration parameter. report - reads a trace.dat file and converts the binary data to a ASCII text readable format. stream - Start tracing and read the output directly profile - Start profiling and read the output directly hist - show a histogram of the events. stat - show tracing (ftrace) status of the running system options - list the plugin options that are available to *report* start - start the tracing without recording to a trace.dat file. stop - stop tracing (only disables recording, overhead of tracer is still in effect) restart - restart tracing from a previous stop (only effects recording) extract - extract the data from the kernel buffer and create a trace.dat file. show - display the contents of one of the Ftrace Linux kernel tracing files reset - disables all tracing and gives back the system performance. (clears all data from the kernel buffers) clear - clear the content of the Ftrace ring buffers. split - splits a trace.dat file into smaller files. list - list the available plugins or events that can be recorded. listen - open up a port to listen for remote tracing connections. agent - listen on a vsocket for trace clients setup-guest - create FIFOs for tracing guest VMs restore - restore the data files of a crashed run of trace-cmd record snapshot- take snapshot of running trace stack - run and display the stack tracer check-events - parse format strings for all trace events and return whether all formats are parseable convert - convert trace files attach - attach a host trace.dat file to a guest trace.dat file dump - read out the meta data from a trace file OPTIONS ------- *-h*, --help:: Display the help text. Other options see the man page for the corresponding command. SEE ALSO -------- trace-cmd-record(1), trace-cmd-report(1), trace-cmd-hist(1), trace-cmd-start(1), trace-cmd-stop(1), trace-cmd-extract(1), trace-cmd-reset(1), trace-cmd-restore(1), trace-cmd-stack(1), trace-cmd-convert(1), trace-cmd-split(1), trace-cmd-list(1), trace-cmd-listen(1), trace-cmd.dat(5), trace-cmd-check-events(1), trace-cmd-stat(1), trace-cmd-attach(1) AUTHOR ------ Written by Steven Rostedt, RESOURCES --------- https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/ COPYING ------- Copyright \(C) 2010 Red Hat, Inc. Free use of this software is granted under the terms of the GNU Public License (GPL). trace-cmd-v3.3.1/Documentation/trace-cmd/trace-cmd.dat.v6.5.txt000066400000000000000000000167751470231550600241020ustar00rootroot00000000000000TRACE-CMD.DAT.v6(5) =================== NAME ---- trace-cmd.dat.v6 - trace-cmd version 6 file format SYNOPSIS -------- *trace-cmd.dat* ignore DESCRIPTION ----------- The trace-cmd(1) utility produces a "trace.dat" file. The file may also be named anything depending if the user specifies a different output name, but it must have a certain binary format. The file is used by trace-cmd to save kernel traces into it and be able to extract the trace from it at a later point (see *trace-cmd-report(1)*). INITIAL FORMAT -------------- The first three bytes contain the magic value: 0x17 0x08 0x44 The next 7 bytes contain the characters: "tracing" The next set of characters contain a null '\0' terminated string that contains the version of the file: "6\0" The next 1 byte contains the flags for the file endianess: 0 = little endian 1 = big endian The next byte contains the number of bytes per "long" value: 4 - 32-bit long values 8 - 64-bit long values Note: This is the long size of the target's userspace. Not the kernel space size. [ Now all numbers are written in file defined endianess. ] The next 4 bytes are a 32-bit word that defines what the traced host machine page size was. HEADER INFO FORMAT ------------------ Directly after the initial format comes information about the trace headers recorded from the target box. The next 12 bytes contain the string: "header_page\0" The next 8 bytes are a 64-bit word containing the size of the page header information stored next. The next set of data is of the size read from the previous 8 bytes, and contains the data retrieved from debugfs/tracing/events/header_page. Note: The size of the second field \fBcommit\fR contains the target kernel long size. For example: field: local_t commit; offset:8; \fBsize:8;\fR signed:1; shows the kernel has a 64-bit long. The next 13 bytes contain the string: "header_event\0" The next 8 bytes are a 64-bit word containing the size of the event header information stored next. The next set of data is of the size read from the previous 8 bytes and contains the data retrieved from debugfs/tracing/events/header_event. This data allows the trace-cmd tool to know if the ring buffer format of the kernel made any changes. FTRACE EVENT FORMATS -------------------- Directly after the header information comes the information about the Ftrace specific events. These are the events used by the Ftrace plugins and are not enabled by the event tracing. The next 4 bytes contain a 32-bit word of the number of Ftrace event format files that are stored in the file. For the number of times defined by the previous 4 bytes is the following: 8 bytes for the size of the Ftrace event format file. The Ftrace event format file copied from the target machine: debugfs/tracing/events/ftrace//format EVENT FORMATS ------------- Directly after the Ftrace formats comes the information about the event layout. The next 4 bytes are a 32-bit word containing the number of event systems that are stored in the file. These are the directories in debugfs/tracing/events excluding the \fBftrace\fR directory. For the number of times defined by the previous 4 bytes is the following: A null-terminated string containing the system name. 4 bytes containing a 32-bit word containing the number of events within the system. For the number of times defined in the previous 4 bytes is the following: 8 bytes for the size of the event format file. The event format file copied from the target machine: debugfs/tracing/events///format KALLSYMS INFORMATION -------------------- Directly after the event formats comes the information of the mapping of function addresses to the function names. The next 4 bytes are a 32-bit word containing the size of the data holding the function mappings. The next set of data is of the size defined by the previous 4 bytes and contains the information from the target machine's file: /proc/kallsyms TRACE_PRINTK INFORMATION ------------------------ If a developer used trace_printk() within the kernel, it may store the format string outside the ring buffer. This information can be found in: debugfs/tracing/printk_formats The next 4 bytes are a 32-bit word containing the size of the data holding the printk formats. The next set of data is of the size defined by the previous 4 bytes and contains the information from debugfs/tracing/printk_formats. PROCESS INFORMATION ------------------- Directly after the trace_printk formats comes the information mapping a PID to a process name. The next 8 bytes contain a 64-bit word that holds the size of the data mapping the PID to a process name. The next set of data is of the size defined by the previous 8 bytes and contains the information from debugfs/tracing/saved_cmdlines. REST OF TRACE-CMD HEADER ------------------------ Directly after the process information comes the last bit of the trace.dat file header. The next 4 bytes are a 32-bit word defining the number of CPUs that were discovered on the target machine (and has matching trace data for it). The next 10 bytes are one of the following: "options \0" "latency \0" "flyrecord\0" If it is "options \0" then: The next 2 bytes are a 16-bit word defining the current option. If the the value is zero then there are no more options. Otherwise, the next 4 bytes contain a 32-bit word containing the option size. If the reader does not know how to handle the option it can simply skip it. Currently there are no options defined, but this is here to extend the data. The next option will be directly after the previous option, and the options ends with a zero in the option type field. The next 10 bytes after the options are one of the following: "latency \0" "flyrecord\0" which would follow the same as if options were not present. If the value is "latency \0", then the rest of the file is simply ASCII text that was taken from the target's: debugfs/tracing/trace If the value is "flyrecord\0", the following is present: For the number of CPUs that were read earlier, the following is present: 8 bytes that are a 64-bit word containing the offset into the file that holds the data for the CPU. 8 bytes that are a 64-bit word containing the size of the CPU data at that offset. CPU DATA -------- The CPU data is located in the part of the file that is specified in the end of the header. Padding is placed between the header and the CPU data, placing the CPU data at a page aligned (target page) position in the file. This data is copied directly from the Ftrace ring buffer and is of the same format as the ring buffer specified by the event header files loaded in the header format file. The trace-cmd tool will try to \fBmmap(2)\fR the data page by page with the target's page size if possible. If it fails to mmap, it will just read the data instead. SEE ALSO -------- trace-cmd(1), trace-cmd-record(1), trace-cmd-report(1), trace-cmd-start(1), trace-cmd-stop(1), trace-cmd-extract(1), trace-cmd-reset(1), trace-cmd-split(1), trace-cmd-list(1), trace-cmd-listen(1), trace-cmd.dat(5) AUTHOR ------ Written by Steven Rostedt, RESOURCES --------- https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/ COPYING ------- Copyright \(C) 2010 Red Hat, Inc. Free use of this software is granted under the terms of the GNU Public License (GPL). trace-cmd-v3.3.1/Documentation/trace-cmd/trace-cmd.dat.v7.5.txt000066400000000000000000000422161470231550600240700ustar00rootroot00000000000000TRACE-CMD.DAT.v7(5) =================== NAME ---- trace-cmd.dat.v7 - trace-cmd version 7 file format SYNOPSIS -------- *trace-cmd.dat* ignore DESCRIPTION ----------- The trace-cmd(1) utility produces a "trace.dat" file. The file may also be named anything depending if the user specifies a different output name, but it must have a certain binary format. The file is used by trace-cmd to save kernel traces into it and be able to extract the trace from it at a later point (see *trace-cmd-report(1)*). INITIAL FORMAT -------------- The first three bytes contain the magic value: 0x17 0x08 0x44 The next 7 bytes contain the characters: "tracing" The next set of characters contain a null '\0' terminated string that contains the version of the file: "7\0" The next 1 byte contains the flags for the file endianess: 0 = little endian 1 = big endian The next byte contains the number of bytes per "long" value: 4 - 32-bit long values 8 - 64-bit long values Note: This is the long size of the target's user space. Not the kernel space size. [ Now all numbers are written in file defined endianess. ] The next 4 bytes are a 32-bit word that defines what the traced host machine page size was. The compression algorithm header is written next: "name\0version\0" where "name" and "version" are strings, name and version of the compression algorithm used to compress the trace file. If the name is "none", the data in the file is not compressed. The next 8 bytes are 64-bit integer, the offset within the file where the first OPTIONS section is located. The rest of the file consists of different sections. The only mandatory is the first OPTIONS section, all others are optional. The location and the order of the sections is not strict. Each section starts with a header: FORMAT OF THE SECTION HEADER ---------------------------- <2 bytes> unsigned short integer, ID of the section. <2 bytes> unsigned short integer, section flags: 1 = the section is compressed. <4 bytes> ID of a string, description of the section. <8 bytes> long long unsigned integer, size of the section in the file. If the section is compressed, the above is the compressed size. The section must be uncompressed on reading. The described format of the sections refers to the uncompressed data. COMPRESSION FORMAT OF THE FILE SECTIONS --------------------------------------- Some of the sections in the file may be compressed with the compression algorithm, specified in the compression algorithm header. Compressed sections have a compression header, written after the section header and right before the compressed data: <4 bytes> unsigned int, size of compressed data in this section. <4 bytes> unsigned int, size of uncompressed data. binary compressed data, with the specified size. COMPRESSION FORMAT OF THE TRACE DATA ------------------------------------ There are two special sections, BUFFER FLYRECORD and BUFFER LATENCY, containing trace data. These sections may be compressed with the compression algorithm, specified in the compression header. Usually the size of these sections is huge, that's why its compression format is different from the other sections. The trace data is compressed in chunks The size of one chunk is specified in the file creation time. The format of compressed trace data is: <4 bytes> unsigned int, count of chunks. Follows the compressed chunks of given count. For each chunk: <4 bytes> unsigned int, size of compressed data in this chunk. <4 bytes> unsigned int, size of uncompressed data, aligned with the trace page size. binary compressed data, with the specified size. These chunks must be uncompressed on reading. The described format of trace data refers to the uncompressed data. OPTIONS SECTION --------------- Section ID: 0 This is the the only mandatory section in the file. There can be multiple options sections, the first one is located at the offset specified right after the compression algorithm header. The section consists of multiple trace options, each option has the following format: <2 bytes> unsigned short integer, ID of the option. <4 bytes> unsigned integer, size of the option's data. bytes of the size specified above, data of the option. Options, supported by the trace file version 7: DONE: id 0, size 8 This option indicates the end of the options section, it is written always as last option. The DONE option data is: <8 bytes> long long unsigned integer, offset in the trace file where the next options section is located. If this offset is 0, then there are no more options sections. DATE: id 1, size vary The DATE option data is a null terminated ASCII string, which represents the time difference between trace events timestamps and the Generic Time of Day of the system. CPUSTAT: id 2, size vary The CPUSTAT option data is a null terminated ASCII string, the content of the "per_cpu/cpu/stats" file from the trace directory. There is a CPUSTAT option for each CPU. BUFFER: id 3, size vary The BUFFER option describes the flyrecord trace data saved in the file, collected from one trace instance. There is BUFFER option for each trace instance. The format of the BUFFER data is: <8 bytes> long long unsigned integer, offset in the trace file where the BUFFER FLYRECORD section is located, containing flyrecord trace data. a null terminated ASCII string, name of the trace instance. Empty string "" is saved as name of the top instance. a null terminated ASCII string, trace clock used for events timestamps in this trace instance. <4 bytes> unsigned integer, size of the trace buffer page. <4 bytes> unsigned integer, count of the CPUs with trace data. For each CPU of the above count: <4 bytes> unsigned integer, ID of the CPU. <8 bytes> long long unsigned integer, offset in the trace file where the trace data for this CPU is located. <8 bytes> long long unsigned integer, size of the trace data for this CPU. TRACECLOCK: id 4, size vary The TRACECLOCK option data is a null terminated ASCII string, the content of the "trace_clock" file from the trace directory. UNAME: id 5, size vary The UNAME option data is a null terminated ASCII string, identifying the system where the trace data is collected. The string is retrieved by the uname() system call. HOOK: id 6, size vary The HOOK option data is a null terminated ASCII string, describing event hooks: custom event matching to connect any two events together. OFFSET: id 7, size vary The OFFSET option data is a null terminated ASCII string, representing a fixed time that is added to each event timestamp on reading. CPUCOUNT: id 8, size 4 The CPUCOUNT option data is: <4 bytes> unsigned integer, number of CPUs in the system. VERSION: id 9, size vary The VERSION option data is a null terminated ASCII string, representing the version of the trace-cmd application, used to collect these trace logs. PROCMAPS: id 10, size vary The PROCMAPS option data is a null terminated ASCII string, representing the memory map of each traced filtered process. The format of the string is, for each filtered process: \n \n ... separate line for each library, used by this process ... ... TRACEID: id 11, size 8 The TRACEID option data is a unique identifier of this tracing session: <8 bytes> long long unsigned integer, trace session identifier. TIME_SHIFT: id 12, size vary The TIME_SHIFT option stores time synchronization information, collected during host and guest tracing session. Usually it is saved in the guest trace file. This information is used to synchronize guest with host events timestamps, when displaying all files from this tracing session. The format of the TIME_SHIFT option data is: <8 bytes> long long unsigned integer, trace identifier of the peer (usually the host). <4 bytes> unsigned integer, flags specific to the time synchronization protocol, used in this trace session. <4 bytes> unsigned integer, number of traced CPUs. For each CPU, timestamps corrections are recorded: <4 bytes> unsigned integer, count of the recorded timestamps corrections for this CPU. , times when the corrections are calculated , corrections offsets , corrections scaling ratio GUEST: id 13, size vary The GUEST option stores information about traced guests in this tracing session. Usually it is saved in the host trace file. There is a separate GUEST option for each traced guest. The information is used when displaying all files from this tracing session. The format of the GUEST option data is: a null terminated ASCII string, name of the guest. <8 bytes> long long unsigned integer, trace identifier of the guest for this session. <4 bytes> unsigned integer, number of guest's CPUs. For each CPU: <4 bytes> unsigned integer, ID of the CPU. <4 bytes> unsigned integer, PID of the host task, emulating this guest CPU. TSC2NSEC: id 14, size 16 The TSC2NSEC option stores information, used to convert TSC events timestamps to nanoseconds. The format of the TSC2NSEC option data is: <4 bytes> unsigned integer, time multiplier. <4 bytes> unsigned integer, time shift. <8 bytes> unsigned long long integer, time offset. STRINGS: id 15, size vary The STRINGS option holds a list of nul terminated strings that holds the names of the other sections. HEADER_INFO: id 16, size 8 The HEADER_INFO option data is: <8 bytes> long long unsigned integer, offset into the trace file where the HEADER INFO section is located FTRACE_EVENTS: id 17, size 8 The FTRACE_EVENTS option data is: <8 bytes> long long unsigned integer, offset into the trace file where the FTRACE EVENT FORMATS section is located. EVENT_FORMATS: id 18, size 8 The EVENT_FORMATS option data is: <8 bytes> long long unsigned integer, offset into the trace file where the EVENT FORMATS section is located. KALLSYMS: id 19, size 8 The KALLSYMS option data is: <8 bytes> long long unsigned integer, offset into the trace file where the KALLSYMS section is located. PRINTK: id 20, size 8 The PRINTK option data is: <8 bytes> long long unsigned integer, offset into the trace file where the TRACE_PRINTK section is located. CMDLINES: id 21, size 8 The CMDLINES option data is: <8 bytes> long long unsigned integer, offset into the trace file where the SAVED COMMAND LINES section is located. BUFFER_TEXT: id 22, size The BUFFER_LAT option describes the latency trace data saved in the file. The format of the BUFFER_LAT data is: <8 bytes> long long unsigned integer, offset in the trace file where the BUFFER LATENCY section is located, containing latency trace data. a null terminated ASCII string, name of the trace instance. Empty string "" is saved as name of the top instance. a null terminated ASCII string, trace clock used for events timestamps in this trace instance. HEADER INFO SECTION ------------------- Section ID: 16 The first 12 bytes of the section, after the section header, contain the string: "header_page\0" The next 8 bytes are a 64-bit word containing the size of the page header information stored next. The next set of data is of the size read from the previous 8 bytes, and contains the data retrieved from debugfs/tracing/events/header_page. Note: The size of the second field \fBcommit\fR contains the target kernel long size. For example: field: local_t commit; offset:8; \fBsize:8;\fR signed:1; shows the kernel has a 64-bit long. The next 13 bytes contain the string: "header_event\0" The next 8 bytes are a 64-bit word containing the size of the event header information stored next. The next set of data is of the size read from the previous 8 bytes and contains the data retrieved from debugfs/tracing/events/header_event. This data allows the trace-cmd tool to know if the ring buffer format of the kernel made any changes. FTRACE EVENT FORMATS SECTION ---------------------------- Section ID: 17 Directly after the section header comes the information about the Ftrace specific events. These are the events used by the Ftrace plugins and are not enabled by the event tracing. The next 4 bytes contain a 32-bit word of the number of Ftrace event format files that are stored in the file. For the number of times defined by the previous 4 bytes is the following: 8 bytes for the size of the Ftrace event format file. The Ftrace event format file copied from the target machine: debugfs/tracing/events/ftrace//format EVENT FORMATS SECTION --------------------- Section ID: 18 Directly after the section header comes the information about the event layout. The next 4 bytes are a 32-bit word containing the number of event systems that are stored in the file. These are the directories in debugfs/tracing/events excluding the \fBftrace\fR directory. For the number of times defined by the previous 4 bytes is the following: A null-terminated string containing the system name. 4 bytes containing a 32-bit word containing the number of events within the system. For the number of times defined in the previous 4 bytes is the following: 8 bytes for the size of the event format file. The event format file copied from the target machine: debugfs/tracing/events///format KALLSYMS SECTION ---------------- Section ID: 19 Directly after the section header comes the information of the mapping of function addresses to the function names. The next 4 bytes are a 32-bit word containing the size of the data holding the function mappings. The next set of data is of the size defined by the previous 4 bytes and contains the information from the target machine's file: /proc/kallsyms TRACE_PRINTK SECTION -------------------- Section ID: 20 If a developer used trace_printk() within the kernel, it may store the format string outside the ring buffer. This information can be found in: debugfs/tracing/printk_formats The next 4 bytes are a 32-bit word containing the size of the data holding the printk formats. The next set of data is of the size defined by the previous 4 bytes and contains the information from debugfs/tracing/printk_formats. SAVED COMMAND LINES SECTION --------------------------- Section ID: 21 Directly after the section header comes the information mapping a PID to a process name. The next 8 bytes contain a 64-bit word that holds the size of the data mapping the PID to a process name. The next set of data is of the size defined by the previous 8 bytes and contains the information from debugfs/tracing/saved_cmdlines. BUFFER FLYRECORD SECTION ------------------------ This section contains flyrecord tracing data, collected in one trace instance. The data is saved per CPU. Each BUFFER FLYRECORD section has a corresponding BUFFER option, containing information about saved CPU's trace data. Padding is placed between the section header and the CPU data, placing the CPU data at a page aligned (target page) position in the file. This data is copied directly from the Ftrace ring buffer and is of the same format as the ring buffer specified by the event header files loaded in the header format file. The trace-cmd tool will try to \fBmmap(2)\fR the data page by page with the target's page size if possible. If it fails to mmap, it will just read the data instead. BUFFER TEXT SECTION ------------------------ Section ID: 22 This section contains latency tracing data, ASCII text taken from the target's debugfs/tracing/trace file. STRINGS SECTION ------------------------ Section ID: 15 All strings of the trace file metadata are stored in a string section within the file. The section contains a list of nul terminated ASCII strings. An ID of the string is used in the file meta data, which is the offset of the actual string into the string section. Strings can be stored into multiple string sections in the file. SEE ALSO -------- trace-cmd(1), trace-cmd-record(1), trace-cmd-report(1), trace-cmd-start(1), trace-cmd-stop(1), trace-cmd-extract(1), trace-cmd-reset(1), trace-cmd-split(1), trace-cmd-list(1), trace-cmd-listen(1), trace-cmd.dat(5) AUTHOR ------ Written by Steven Rostedt, RESOURCES --------- https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/ COPYING ------- Copyright \(C) 2010 Red Hat, Inc. Free use of this software is granted under the terms of the GNU Public License (GPL). trace-cmd-v3.3.1/LICENSES/000077500000000000000000000000001470231550600147535ustar00rootroot00000000000000trace-cmd-v3.3.1/LICENSES/GPL-2.0000066400000000000000000000444511470231550600156250ustar00rootroot00000000000000Valid-License-Identifier: GPL-2.0 Valid-License-Identifier: GPL-2.0-only Valid-License-Identifier: GPL-2.0+ Valid-License-Identifier: GPL-2.0-or-later SPDX-URL: https://spdx.org/licenses/GPL-2.0.html Usage-Guide: To use this license in source code, put one of the following SPDX tag/value pairs into a comment according to the placement guidelines in the licensing rules documentation. For 'GNU General Public License (GPL) version 2 only' use: SPDX-License-Identifier: GPL-2.0 or SPDX-License-Identifier: GPL-2.0-only For 'GNU General Public License (GPL) version 2 or any later version' use: SPDX-License-Identifier: GPL-2.0+ or SPDX-License-Identifier: GPL-2.0-or-later License-Text: GNU GENERAL PUBLIC LICENSE Version 2, June 1991 Copyright (C) 1989, 1991 Free Software Foundation, Inc. 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This General Public License applies to most of the Free Software Foundation's software and to any other program whose authors commit to using it. (Some other Free Software Foundation software is covered by the GNU Library General Public License instead.) You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things. To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it. For example, if you distribute copies of such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software. Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations. Finally, any free program is threatened constantly by software patents. We wish to avoid the danger that redistributors of a free program will individually obtain patent licenses, in effect making the program proprietary. To prevent this, we have made it clear that any patent must be licensed for everyone's free use or not licensed at all. The precise terms and conditions for copying, distribution and modification follow. GNU GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any derivative work under copyright law: that is to say, a work containing the Program or a portion of it, either verbatim or with modifications and/or translated into another language. (Hereinafter, translation is included without limitation in the term "modification".) Each licensee is addressed as "you". Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running the Program is not restricted, and the output from the Program is covered only if its contents constitute a work based on the Program (independent of having been made by running the Program). Whether that is true depends on what the Program does. 1. You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and give any other recipients of the Program a copy of this License along with the Program. You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. 2. You may modify your copy or copies of the Program or any portion of it, thus forming a work based on the Program, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: a) You must cause the modified files to carry prominent notices stating that you changed the files and the date of any change. b) You must cause any work that you distribute or publish, that in whole or in part contains or is derived from the Program or any part thereof, to be licensed as a whole at no charge to all third parties under the terms of this License. c) If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the most ordinary way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this License. (Exception: if the Program itself is interactive but does not normally print such an announcement, your work based on the Program is not required to print an announcement.) These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Program, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Program, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Program. In addition, mere aggregation of another work not based on the Program with the Program (or with a work based on the Program) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. 3. You may copy and distribute the Program (or a work based on it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you also do one of the following: a) Accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, b) Accompany it with a written offer, valid for at least three years, to give any third party, for a charge no more than your cost of physically performing source distribution, a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, c) Accompany it with the information you received as to the offer to distribute corresponding source code. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form with such an offer, in accord with Subsection b above.) The source code for a work means the preferred form of the work for making modifications to it. For an executable work, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the executable. However, as a special exception, the source code distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. If distribution of executable or object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place counts as distribution of the source code, even though third parties are not compelled to copy the source along with the object code. 4. You may not copy, modify, sublicense, or distribute the Program except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense or distribute the Program is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. 5. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Program or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Program (or any work based on the Program), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Program or works based on it. 6. Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties to this License. 7. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Program at all. For example, if a patent license would not permit royalty-free redistribution of the Program by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Program. If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply and the section as a whole is intended to apply in other circumstances. It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system, which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. 8. If the distribution and/or use of the Program is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Program under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. 9. The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of this License, you may choose any version ever published by the Free Software Foundation. 10. If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA Also add information on how to contact you by electronic and paper mail. If the program is interactive, make it output a short notice like this when it starts in an interactive mode: Gnomovision version 69, Copyright (C) year name of author Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, the commands you use may be called something other than `show w' and `show c'; they could even be mouse-clicks or menu items--whatever suits your program. You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the program, if necessary. Here is a sample; alter the names: Yoyodyne, Inc., hereby disclaims all copyright interest in the program `Gnomovision' (which makes passes at compilers) written by James Hacker. , 1 April 1989 Ty Coon, President of Vice This General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Library General Public License instead of this License. trace-cmd-v3.3.1/LICENSES/LGPL-2.1000066400000000000000000000654251470231550600157460ustar00rootroot00000000000000Valid-License-Identifier: LGPL-2.1 Valid-License-Identifier: LGPL-2.1+ SPDX-URL: https://spdx.org/licenses/LGPL-2.1.html Usage-Guide: To use this license in source code, put one of the following SPDX tag/value pairs into a comment according to the placement guidelines in the licensing rules documentation. For 'GNU Lesser General Public License (LGPL) version 2.1 only' use: SPDX-License-Identifier: LGPL-2.1 For 'GNU Lesser General Public License (LGPL) version 2.1 or any later version' use: SPDX-License-Identifier: LGPL-2.1+ License-Text: GNU LESSER GENERAL PUBLIC LICENSE Version 2.1, February 1999 Copyright (C) 1991, 1999 Free Software Foundation, Inc. 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. [This is the first released version of the Lesser GPL. It also counts as the successor of the GNU Library Public License, version 2, hence the version number 2.1.] Preamble The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public Licenses are intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This license, the Lesser General Public License, applies to some specially designated software packages--typically libraries--of the Free Software Foundation and other authors who decide to use it. You can use it too, but we suggest you first think carefully about whether this license or the ordinary General Public License is the better strategy to use in any particular case, based on the explanations below. When we speak of free software, we are referring to freedom of use, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish); that you receive source code or can get it if you want it; that you can change the software and use pieces of it in new free programs; and that you are informed that you can do these things. To protect your rights, we need to make restrictions that forbid distributors to deny you these rights or to ask you to surrender these rights. These restrictions translate to certain responsibilities for you if you distribute copies of the library or if you modify it. For example, if you distribute copies of the library, whether gratis or for a fee, you must give the recipients all the rights that we gave you. You must make sure that they, too, receive or can get the source code. If you link other code with the library, you must provide complete object files to the recipients, so that they can relink them with the library after making changes to the library and recompiling it. And you must show them these terms so they know their rights. We protect your rights with a two-step method: (1) we copyright the library, and (2) we offer you this license, which gives you legal permission to copy, distribute and/or modify the library. To protect each distributor, we want to make it very clear that there is no warranty for the free library. Also, if the library is modified by someone else and passed on, the recipients should know that what they have is not the original version, so that the original author's reputation will not be affected by problems that might be introduced by others. Finally, software patents pose a constant threat to the existence of any free program. We wish to make sure that a company cannot effectively restrict the users of a free program by obtaining a restrictive license from a patent holder. Therefore, we insist that any patent license obtained for a version of the library must be consistent with the full freedom of use specified in this license. Most GNU software, including some libraries, is covered by the ordinary GNU General Public License. This license, the GNU Lesser General Public License, applies to certain designated libraries, and is quite different from the ordinary General Public License. We use this license for certain libraries in order to permit linking those libraries into non-free programs. When a program is linked with a library, whether statically or using a shared library, the combination of the two is legally speaking a combined work, a derivative of the original library. The ordinary General Public License therefore permits such linking only if the entire combination fits its criteria of freedom. The Lesser General Public License permits more lax criteria for linking other code with the library. We call this license the "Lesser" General Public License because it does Less to protect the user's freedom than the ordinary General Public License. It also provides other free software developers Less of an advantage over competing non-free programs. These disadvantages are the reason we use the ordinary General Public License for many libraries. However, the Lesser license provides advantages in certain special circumstances. For example, on rare occasions, there may be a special need to encourage the widest possible use of a certain library, so that it becomes a de-facto standard. To achieve this, non-free programs must be allowed to use the library. A more frequent case is that a free library does the same job as widely used non-free libraries. In this case, there is little to gain by limiting the free library to free software only, so we use the Lesser General Public License. In other cases, permission to use a particular library in non-free programs enables a greater number of people to use a large body of free software. For example, permission to use the GNU C Library in non-free programs enables many more people to use the whole GNU operating system, as well as its variant, the GNU/Linux operating system. Although the Lesser General Public License is Less protective of the users' freedom, it does ensure that the user of a program that is linked with the Library has the freedom and the wherewithal to run that program using a modified version of the Library. The precise terms and conditions for copying, distribution and modification follow. Pay close attention to the difference between a "work based on the library" and a "work that uses the library". The former contains code derived from the library, whereas the latter must be combined with the library in order to run. TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License Agreement applies to any software library or other program which contains a notice placed by the copyright holder or other authorized party saying it may be distributed under the terms of this Lesser General Public License (also called "this License"). Each licensee is addressed as "you". A "library" means a collection of software functions and/or data prepared so as to be conveniently linked with application programs (which use some of those functions and data) to form executables. The "Library", below, refers to any such software library or work which has been distributed under these terms. A "work based on the Library" means either the Library or any derivative work under copyright law: that is to say, a work containing the Library or a portion of it, either verbatim or with modifications and/or translated straightforwardly into another language. (Hereinafter, translation is included without limitation in the term "modification".) "Source code" for a work means the preferred form of the work for making modifications to it. For a library, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the library. Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running a program using the Library is not restricted, and output from such a program is covered only if its contents constitute a work based on the Library (independent of the use of the Library in a tool for writing it). Whether that is true depends on what the Library does and what the program that uses the Library does. 1. You may copy and distribute verbatim copies of the Library's complete source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and distribute a copy of this License along with the Library. You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. 2. You may modify your copy or copies of the Library or any portion of it, thus forming a work based on the Library, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: a) The modified work must itself be a software library. b) You must cause the files modified to carry prominent notices stating that you changed the files and the date of any change. c) You must cause the whole of the work to be licensed at no charge to all third parties under the terms of this License. d) If a facility in the modified Library refers to a function or a table of data to be supplied by an application program that uses the facility, other than as an argument passed when the facility is invoked, then you must make a good faith effort to ensure that, in the event an application does not supply such function or table, the facility still operates, and performs whatever part of its purpose remains meaningful. (For example, a function in a library to compute square roots has a purpose that is entirely well-defined independent of the application. Therefore, Subsection 2d requires that any application-supplied function or table used by this function must be optional: if the application does not supply it, the square root function must still compute square roots.) These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Library, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Library, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Library. In addition, mere aggregation of another work not based on the Library with the Library (or with a work based on the Library) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. 3. You may opt to apply the terms of the ordinary GNU General Public License instead of this License to a given copy of the Library. To do this, you must alter all the notices that refer to this License, so that they refer to the ordinary GNU General Public License, version 2, instead of to this License. (If a newer version than version 2 of the ordinary GNU General Public License has appeared, then you can specify that version instead if you wish.) Do not make any other change in these notices. Once this change is made in a given copy, it is irreversible for that copy, so the ordinary GNU General Public License applies to all subsequent copies and derivative works made from that copy. This option is useful when you wish to copy part of the code of the Library into a program that is not a library. 4. You may copy and distribute the Library (or a portion or derivative of it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange. If distribution of object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place satisfies the requirement to distribute the source code, even though third parties are not compelled to copy the source along with the object code. 5. A program that contains no derivative of any portion of the Library, but is designed to work with the Library by being compiled or linked with it, is called a "work that uses the Library". Such a work, in isolation, is not a derivative work of the Library, and therefore falls outside the scope of this License. However, linking a "work that uses the Library" with the Library creates an executable that is a derivative of the Library (because it contains portions of the Library), rather than a "work that uses the library". The executable is therefore covered by this License. Section 6 states terms for distribution of such executables. When a "work that uses the Library" uses material from a header file that is part of the Library, the object code for the work may be a derivative work of the Library even though the source code is not. Whether this is true is especially significant if the work can be linked without the Library, or if the work is itself a library. The threshold for this to be true is not precisely defined by law. If such an object file uses only numerical parameters, data structure layouts and accessors, and small macros and small inline functions (ten lines or less in length), then the use of the object file is unrestricted, regardless of whether it is legally a derivative work. (Executables containing this object code plus portions of the Library will still fall under Section 6.) Otherwise, if the work is a derivative of the Library, you may distribute the object code for the work under the terms of Section 6. Any executables containing that work also fall under Section 6, whether or not they are linked directly with the Library itself. 6. As an exception to the Sections above, you may also combine or link a "work that uses the Library" with the Library to produce a work containing portions of the Library, and distribute that work under terms of your choice, provided that the terms permit modification of the work for the customer's own use and reverse engineering for debugging such modifications. You must give prominent notice with each copy of the work that the Library is used in it and that the Library and its use are covered by this License. You must supply a copy of this License. If the work during execution displays copyright notices, you must include the copyright notice for the Library among them, as well as a reference directing the user to the copy of this License. Also, you must do one of these things: a) Accompany the work with the complete corresponding machine-readable source code for the Library including whatever changes were used in the work (which must be distributed under Sections 1 and 2 above); and, if the work is an executable linked with the Library, with the complete machine-readable "work that uses the Library", as object code and/or source code, so that the user can modify the Library and then relink to produce a modified executable containing the modified Library. (It is understood that the user who changes the contents of definitions files in the Library will not necessarily be able to recompile the application to use the modified definitions.) b) Use a suitable shared library mechanism for linking with the Library. A suitable mechanism is one that (1) uses at run time a copy of the library already present on the user's computer system, rather than copying library functions into the executable, and (2) will operate properly with a modified version of the library, if the user installs one, as long as the modified version is interface-compatible with the version that the work was made with. c) Accompany the work with a written offer, valid for at least three years, to give the same user the materials specified in Subsection 6a, above, for a charge no more than the cost of performing this distribution. d) If distribution of the work is made by offering access to copy from a designated place, offer equivalent access to copy the above specified materials from the same place. e) Verify that the user has already received a copy of these materials or that you have already sent this user a copy. For an executable, the required form of the "work that uses the Library" must include any data and utility programs needed for reproducing the executable from it. However, as a special exception, the materials to be distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. It may happen that this requirement contradicts the license restrictions of other proprietary libraries that do not normally accompany the operating system. Such a contradiction means you cannot use both them and the Library together in an executable that you distribute. 7. You may place library facilities that are a work based on the Library side-by-side in a single library together with other library facilities not covered by this License, and distribute such a combined library, provided that the separate distribution of the work based on the Library and of the other library facilities is otherwise permitted, and provided that you do these two things: a) Accompany the combined library with a copy of the same work based on the Library, uncombined with any other library facilities. This must be distributed under the terms of the Sections above. b) Give prominent notice with the combined library of the fact that part of it is a work based on the Library, and explaining where to find the accompanying uncombined form of the same work. 8. You may not copy, modify, sublicense, link with, or distribute the Library except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense, link with, or distribute the Library is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. 9. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Library or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Library (or any work based on the Library), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Library or works based on it. 10. Each time you redistribute the Library (or any work based on the Library), the recipient automatically receives a license from the original licensor to copy, distribute, link with or modify the Library subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties with this License. 11. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Library at all. For example, if a patent license would not permit royalty-free redistribution of the Library by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Library. If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply, and the section as a whole is intended to apply in other circumstances. It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. 12. If the distribution and/or use of the Library is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Library under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. 13. The Free Software Foundation may publish revised and/or new versions of the Lesser General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Library specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Library does not specify a license version number, you may choose any version ever published by the Free Software Foundation. 14. If you wish to incorporate parts of the Library into other free programs whose distribution conditions are incompatible with these, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Libraries If you develop a new library, and you want it to be of the greatest possible use to the public, we recommend making it free software that everyone can redistribute and change. You can do so by permitting redistribution under these terms (or, alternatively, under the terms of the ordinary General Public License). To apply these terms, attach the following notices to the library. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. one line to give the library's name and an idea of what it does. Copyright (C) year name of author This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Also add information on how to contact you by electronic and paper mail. You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the library, if necessary. Here is a sample; alter the names: Yoyodyne, Inc., hereby disclaims all copyright interest in the library `Frob' (a library for tweaking knobs) written by James Random Hacker. signature of Ty Coon, 1 April 1990 Ty Coon, President of Vice That's all there is to it! trace-cmd-v3.3.1/Makefile000066400000000000000000000427631470231550600152220ustar00rootroot00000000000000# SPDX-License-Identifier: GPL-2.0 # trace-cmd version TC_VERSION = 3 TC_PATCHLEVEL = 3 TC_EXTRAVERSION = 1 TRACECMD_VERSION = $(TC_VERSION).$(TC_PATCHLEVEL).$(TC_EXTRAVERSION) export TC_VERSION export TC_PATCHLEVEL export TC_EXTRAVERSION export TRACECMD_VERSION LIBTC_VERSION = 1 LIBTC_PATCHLEVEL = 5 LIBTC_EXTRAVERSION = 2 LIBTRACECMD_VERSION = $(LIBTC_VERSION).$(LIBTC_PATCHLEVEL).$(LIBTC_EXTRAVERSION) export LIBTC_VERSION export LIBTC_PATCHLEVEL export LIBTC_EXTRAVERSION export LIBTRACECMD_VERSION VERSION_FILE = ltc_version.h LIBTRACEEVENT_MIN_VERSION = 1.5 LIBTRACEFS_MIN_VERSION = 1.8 MAKEFLAGS += --no-print-directory # Makefiles suck: This macro sets a default value of $(2) for the # variable named by $(1), unless the variable has been set by # environment or command line. This is necessary for CC and AR # because make sets default values, so the simpler ?= approach # won't work as expected. define allow-override $(if $(or $(findstring environment,$(origin $(1))),\ $(findstring command line,$(origin $(1)))),,\ $(eval $(1) = $(2))) endef # Allow setting CC and AR, or setting CROSS_COMPILE as a prefix. $(call allow-override,CC,$(CROSS_COMPILE)gcc) $(call allow-override,AR,$(CROSS_COMPILE)ar) $(call allow-override,PKG_CONFIG,pkg-config) $(call allow-override,LD_SO_CONF_PATH,/etc/ld.so.conf.d/) $(call allow-override,LDCONFIG,ldconfig) export LD_SO_CONF_PATH LDCONFIG EXT = -std=gnu99 INSTALL = install # Use DESTDIR for installing into a different root directory. # This is useful for building a package. The program will be # installed in this directory as if it was the root directory. # Then the build tool can move it later. DESTDIR ?= DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))' LP64 := $(shell echo __LP64__ | ${CC} ${CFLAGS} -E -x c - | tail -n 1) ifeq ($(LP64), 1) libdir_relative_temp = lib64 else libdir_relative_temp = lib endif libdir_relative ?= $(libdir_relative_temp) prefix ?= /usr/local bindir_relative = bin bindir = $(prefix)/$(bindir_relative) man_dir = $(prefix)/share/man man_dir_SQ = '$(subst ','\'',$(man_dir))' html_install_SQ = '$(subst ','\'',$(html_install))' img_install_SQ = '$(subst ','\'',$(img_install))' libdir = $(prefix)/$(libdir_relative) libdir_SQ = '$(subst ','\'',$(libdir))' includedir = $(prefix)/include includedir_SQ = '$(subst ','\'',$(includedir))' pkgconfig_dir ?= $(word 1,$(shell $(PKG_CONFIG) \ --variable pc_path pkg-config | tr ":" " ")) etcdir ?= /etc etcdir_SQ = '$(subst ','\'',$(etcdir))' export man_dir man_dir_SQ html_install html_install_SQ INSTALL export img_install img_install_SQ libdir libdir_SQ includedir_SQ export DESTDIR DESTDIR_SQ ifeq ($(prefix),$(HOME)) plugin_tracecmd_dir = $(libdir)/trace-cmd/plugins python_dir ?= $(libdir)/trace-cmd/python var_dir = $(HOME)/.trace-cmd/ else python_dir ?= $(libdir)/trace-cmd/python PLUGIN_DIR_TRACECMD = -DPLUGIN_TRACECMD_DIR="$(plugin_tracecmd_dir)" PYTHON_DIR = -DPYTHON_DIR="$(python_dir)" PLUGIN_DIR_TRACECMD_SQ = '$(subst ','\'',$(PLUGIN_DIR_TRACECMD))' PYTHON_DIR_SQ = '$(subst ','\'',$(PYTHON_DIR))' var_dir = /var endif # Shell quotes bindir_SQ = $(subst ','\'',$(bindir)) bindir_relative_SQ = $(subst ','\'',$(bindir_relative)) plugin_tracecmd_dir_SQ = $(subst ','\'',$(plugin_tracecmd_dir)) python_dir_SQ = $(subst ','\'',$(python_dir)) pound := \# VAR_DIR = -DVAR_DIR="$(var_dir)" VAR_DIR_SQ = '$(subst ','\'',$(VAR_DIR))' var_dir_SQ = '$(subst ','\'',$(var_dir))' HELP_DIR = -DHELP_DIR=$(html_install) HELP_DIR_SQ = '$(subst ','\'',$(HELP_DIR))' #' emacs highlighting gets confused by the above escaped quote. BASH_COMPLETE_DIR ?= $(etcdir)/bash_completion.d export PLUGIN_DIR_TRACECMD export PYTHON_DIR export PYTHON_DIR_SQ export plugin_tracecmd_dir_SQ export python_dir_SQ export var_dir # copy a bit from Linux kbuild ifeq ("$(origin V)", "command line") VERBOSE = $(V) endif ifndef VERBOSE VERBOSE = 0 endif SILENT := $(if $(findstring s,$(filter-out --%,$(MAKEFLAGS))),1) SWIG_DEFINED := $(shell if command -v swig; then echo 1; else echo 0; fi) ifeq ($(SWIG_DEFINED), 0) BUILD_PYTHON := report_noswig NO_PYTHON = 1 endif ifndef NO_PYTHON PYTHON := ctracecmd.so PYTHON_VERS ?= python3 PYTHON_PKGCONFIG_VERS ?= $(PYTHON_VERS) # Can build python? ifeq ($(shell sh -c "$(PKG_CONFIG) --cflags $(PYTHON_PKGCONFIG_VERS) > /dev/null 2>&1 && echo y"), y) BUILD_PYTHON := $(PYTHON) BUILD_PYTHON_WORKS := 1 else BUILD_PYTHON := report_nopythondev NO_PYTHON = 1 endif endif # NO_PYTHON export BUILD_PYTHON_WORKS export NO_PYTHON # $(call test-build, snippet, ret) -> ret if snippet compiles # -> empty otherwise test-build = $(if $(shell sh -c 'echo "$(1)" | \ $(CC) -o /dev/null -x c - > /dev/null 2>&1 && echo y'), $2) UDIS86_AVAILABLE := $(call test-build,\#include , y) ifneq ($(strip $(UDIS86_AVAILABLE)), y) NO_UDIS86 := 1 endif ifndef NO_UDIS86 # have udis86 disassembler library? udis86-flags := -DHAVE_UDIS86 -ludis86 udis86-ldflags := -ludis86 endif # NO_UDIS86 define BLK_TC_FLUSH_SOURCE #include int main(void) { return BLK_TC_FLUSH; } endef # have flush/fua block layer instead of barriers? blk-flags := $(call test-build,$(BLK_TC_FLUSH_SOURCE),-DHAVE_BLK_TC_FLUSH) define MEMFD_CREATE_SOURCE #define _GNU_SOURCE #include int main(void) { return memfd_create(\"test\", 0); } endef # have memfd_create available memfd-flags := $(call test-build,$(MEMFD_CREATE_SOURCE),-DHAVE_MEMFD_CREATE) ifeq ("$(origin O)", "command line") saved-output := $(O) BUILD_OUTPUT := $(shell cd $(O) && /bin/pwd) $(if $(BUILD_OUTPUT),, \ $(error output directory "$(saved-output)" does not exist)) else BUILD_OUTPUT = $(CURDIR) endif srctree := $(if $(BUILD_SRC),$(BUILD_SRC),$(CURDIR)) objtree := $(BUILD_OUTPUT) src := $(srctree) obj := $(objtree) PKG_CONFIG_SOURCE_FILE = libtracecmd.pc PKG_CONFIG_FILE := $(addprefix $(BUILD_OUTPUT)/,$(PKG_CONFIG_SOURCE_FILE)) export pkgconfig_dir PKG_CONFIG_FILE export prefix bindir src obj LIBS ?= -ldl LIBTRACECMD_DIR = $(obj)/lib/trace-cmd LIBTRACECMD_STATIC = $(LIBTRACECMD_DIR)/libtracecmd.a LIBTRACECMD_SHARED = $(LIBTRACECMD_DIR)/libtracecmd.so.$(LIBTRACECMD_VERSION) LIBTRACECMD_SHARED_VERSION := $(shell echo $(LIBTRACECMD_SHARED) | sed -e 's/\(\.so\.[0-9]*\).*/\1/') LIBTRACECMD_SHARED_SO := $(shell echo $(LIBTRACECMD_SHARED) | sed -e 's/\(\.so\).*/\1/') export LIBTRACECMD_STATIC LIBTRACECMD_SHARED export LIBTRACECMD_SHARED_VERSION LIBTRACECMD_SHARED_SO LIBTRACEEVENT=libtraceevent LIBTRACEFS=libtracefs TEST_LIBTRACEEVENT := $(shell sh -c "$(PKG_CONFIG) --atleast-version $(LIBTRACEEVENT_MIN_VERSION) $(LIBTRACEEVENT) > /dev/null 2>&1 && echo y") TEST_LIBTRACEFS := $(shell sh -c "$(PKG_CONFIG) --atleast-version $(LIBTRACEFS_MIN_VERSION) $(LIBTRACEFS) > /dev/null 2>&1 && echo y") ifeq ("$(TEST_LIBTRACEEVENT)", "y") LIBTRACEEVENT_CFLAGS := $(shell sh -c "$(PKG_CONFIG) --cflags $(LIBTRACEEVENT)") LIBTRACEEVENT_LDLAGS := $(shell sh -c "$(PKG_CONFIG) --libs $(LIBTRACEEVENT)") else .PHONY: warning warning: @echo "********************************************" @echo "** NOTICE: libtraceevent version $(LIBTRACEEVENT_MIN_VERSION) or higher not found on system" @echo "**" @echo "** Consider installing the latest libtraceevent from your" @echo "** distribution, or from source:" @echo "**" @echo "** https://git.kernel.org/pub/scm/libs/libtrace/libtraceevent.git/ " @echo "**" @echo "********************************************" endif export LIBTRACEEVENT_CFLAGS LIBTRACEEVENT_LDLAGS ifeq ("$(TEST_LIBTRACEFS)", "y") LIBTRACEFS_CFLAGS := $(shell sh -c "$(PKG_CONFIG) --cflags $(LIBTRACEFS)") LIBTRACEFS_LDLAGS := $(shell sh -c "$(PKG_CONFIG) --libs $(LIBTRACEFS)") else .PHONY: warning warning: @echo "********************************************" @echo "** NOTICE: libtracefs version $(LIBTRACEFS_MIN_VERSION) or higher not found on system" @echo "**" @echo "** Consider installing the latest libtracefs from your" @echo "** distribution, or from source:" @echo "**" @echo "** https://git.kernel.org/pub/scm/libs/libtrace/libtracefs.git/ " @echo "**" @echo "********************************************" endif export LIBTRACEFS_CFLAGS LIBTRACEFS_LDLAGS TRACE_LIBS = -L$(LIBTRACECMD_DIR) -ltracecmd \ $(LIBTRACEEVENT_LDLAGS) $(LIBTRACEFS_LDLAGS) export LIBS TRACE_LIBS export LIBTRACECMD_DIR export Q SILENT VERBOSE EXT # Include the utils include scripts/utils.mk INCLUDES = -I$(src)/include -I$(src)/../../include INCLUDES += -I$(src)/include/trace-cmd INCLUDES += -I$(src)/lib/trace-cmd/include INCLUDES += -I$(src)/lib/trace-cmd/include/private INCLUDES += -I$(src)/tracecmd/include INCLUDES += $(LIBTRACEEVENT_CFLAGS) INCLUDES += $(LIBTRACEFS_CFLAGS) include $(src)/features.mk # Set compile option CFLAGS if not set elsewhere CFLAGS ?= -g -Wall CPPFLAGS ?= LDFLAGS ?= ifndef NO_VSOCK VSOCK_DEFINED := $(shell if (echo "$(pound)include " | $(CC) -E - >/dev/null 2>&1) ; then echo 1; else echo 0 ; fi) else VSOCK_DEFINED := 0 endif export VSOCK_DEFINED ifeq ($(VSOCK_DEFINED), 1) CFLAGS += -DVSOCK endif PERF_DEFINED := $(shell if (echo "$(pound)include " | $(CC) -E - >/dev/null 2>&1) ; then echo 1; else echo 0 ; fi) export PERF_DEFINED ifeq ($(PERF_DEFINED), 1) CFLAGS += -DPERF endif ZLIB_INSTALLED := $(shell if (printf "$(pound)include \n void main(){deflateInit(NULL, Z_BEST_COMPRESSION);}" | $(CC) -o /dev/null -x c - -lz >/dev/null 2>&1) ; then echo 1; else echo 0 ; fi) ifeq ($(ZLIB_INSTALLED), 1) export ZLIB_INSTALLED ZLIB_LDLAGS = -lz CFLAGS += -DHAVE_ZLIB $(info Have zlib compression support) endif export ZLIB_LDLAGS ifndef NO_LIBZSTD TEST_LIBZSTD := $(shell sh -c "$(PKG_CONFIG) --atleast-version 1.4.0 libzstd > /dev/null 2>&1 && echo y") ifeq ("$(TEST_LIBZSTD)", "y") LIBZSTD_CFLAGS := $(shell sh -c "$(PKG_CONFIG) --cflags libzstd") LIBZSTD_LDLAGS := $(shell sh -c "$(PKG_CONFIG) --libs libzstd") CFLAGS += -DHAVE_ZSTD ZSTD_INSTALLED=1 $(info Have ZSTD compression support) else $(info *************************************************************) $(info ZSTD package not found, best compression algorithm not in use) $(info *************************************************************) endif export LIBZSTD_CFLAGS LIBZSTD_LDLAGS ZSTD_INSTALLED endif CUNIT_INSTALLED := $(shell if (printf "$(pound)include \n void main(){CU_initialize_registry();}" | $(CC) -o /dev/null -x c - -lcunit >/dev/null 2>&1) ; then echo 1; else echo 0 ; fi) export CUNIT_INSTALLED export CFLAGS export INCLUDES # Required CFLAGS override CFLAGS += -D_GNU_SOURCE # Make sure 32 bit stat() works on large file systems override CFLAGS += -D_FILE_OFFSET_BITS=64 ifndef NO_PTRACE ifneq ($(call try-cc,$(SOURCE_PTRACE),),y) NO_PTRACE = 1 override CFLAGS += -DWARN_NO_PTRACE endif endif ifdef NO_PTRACE override CFLAGS += -DNO_PTRACE endif ifndef NO_AUDIT ifneq ($(call try-cc,$(SOURCE_AUDIT),-laudit),y) NO_AUDIT = 1 override CFLAGS += -DWARN_NO_AUDIT endif endif ifdef NO_AUDIT override CFLAGS += -DNO_AUDIT else LIBS += -laudit endif # Append required CFLAGS override CFLAGS += $(INCLUDES) $(VAR_DIR) override CFLAGS += $(PLUGIN_DIR_TRACECMD_SQ) override CFLAGS += $(udis86-flags) $(blk-flags) $(memfd-flags) override LDFLAGS += $(udis86-ldflags) CMD_TARGETS = trace-cmd $(BUILD_PYTHON) ### # Default we just build trace-cmd # # If you want all libraries, then do: make libs ### all: all_cmd plugins show_other_make all_cmd: $(CMD_TARGETS) BUILD_PREFIX := $(BUILD_OUTPUT)/build_prefix $(BUILD_PREFIX): force $(Q)$(call build_prefix,$(prefix)) $(PKG_CONFIG_FILE) : ${PKG_CONFIG_SOURCE_FILE}.template $(BUILD_PREFIX) $(VERSION_FILE) $(Q) $(call do_make_pkgconfig_file,$(prefix)) trace-cmd: force $(LIBTRACECMD_STATIC) \ force $(obj)/lib/trace-cmd/plugins/tracecmd_plugin_dir $(Q)$(MAKE) -C $(src)/tracecmd $(obj)/tracecmd/$@ $(LIBTRACECMD_STATIC): force $(Q)$(MAKE) -C $(src)/lib/trace-cmd $@ $(LIBTRACECMD_SHARED): force $(Q)$(MAKE) -C $(src)/lib/trace-cmd libtracecmd.so libtracecmd.a: $(LIBTRACECMD_STATIC) libtracecmd.so: $(LIBTRACECMD_SHARED) libs: $(LIBTRACECMD_SHARED) $(PKG_CONFIG_FILE) VERSION = $(LIBTC_VERSION) PATCHLEVEL = $(LIBTC_PATCHLEVEL) EXTRAVERSION = $(LIBTC_EXTRAVERSION) define make_version.h (echo '/* This file is automatically generated. Do not modify. */'; \ echo \#define VERSION_CODE $(shell \ expr $(VERSION) \* 256 + $(PATCHLEVEL)); \ echo '#define EXTRAVERSION ' $(EXTRAVERSION); \ echo '#define VERSION_STRING "'$(VERSION).$(PATCHLEVEL).$(EXTRAVERSION)'"'; \ ) > $1 endef define update_version.h ($(call make_version.h, $@.tmp); \ if [ -r $@ ] && cmp -s $@ $@.tmp; then \ rm -f $@.tmp; \ else \ echo ' UPDATE $@'; \ mv -f $@.tmp $@; \ fi); endef $(VERSION_FILE): force $(Q)$(call update_version.h) gui: force @echo "***************************" @echo " KernelShark has moved!" @echo " Please use its new home at https://git.kernel.org/pub/scm/utils/trace-cmd/kernel-shark.git/" @echo "***************************" test: force trace-cmd ifneq ($(CUNIT_INSTALLED),1) $(error CUnit framework not installed, cannot build unit tests)) endif $(Q)$(MAKE) -C $(src)/utest $@ test_mem: force test $(Q)$(MAKE) -C $(src)/utest $@ plugins_tracecmd: force $(obj)/lib/trace-cmd/plugins/tracecmd_plugin_dir $(Q)$(MAKE) -C $(src)/lib/trace-cmd/plugins plugins: plugins_tracecmd $(obj)/lib/trace-cmd/plugins/tracecmd_plugin_dir: force $(Q)$(MAKE) -C $(src)/lib/trace-cmd/plugins $@ show_other_make: @echo "Note: to build man pages, type \"make doc\"" @echo " to build unit tests, type \"make test\"" PHONY += show_other_make define find_tag_files find . -name '\.pc' -prune -o -name '*\.[ch]' -print -o -name '*\.[ch]pp' \ ! -name '\.#' -print endef tags: force $(RM) tags $(call find_tag_files) | xargs ctags --extra=+f --c-kinds=+px TAGS: force $(RM) TAGS $(call find_tag_files) | xargs etags cscope: force $(RM) cscope* $(call find_tag_files) > cscope.files cscope -b -q -f cscope.out install_plugins_tracecmd: force $(Q)$(MAKE) -C $(src)/lib/trace-cmd/plugins install_plugins install_plugins: install_plugins_tracecmd install_python: force $(Q)$(MAKE) -C $(src)/python $@ install_bash_completion: force $(Q)$(call do_install_data,$(src)/tracecmd/trace-cmd.bash,$(BASH_COMPLETE_DIR)) install_cmd: all_cmd install_plugins install_python install_bash_completion $(Q)$(call do_install,$(obj)/tracecmd/trace-cmd,$(bindir_SQ)) install: install_cmd @echo "Note: to install man pages, type \"make install_doc\"" install_gui: force @echo "Nothing to do here." @echo " Have you tried https://git.kernel.org/pub/scm/utils/trace-cmd/kernel-shark.git/" install_libs: libs $(Q)$(MAKE) -C $(src)/lib/trace-cmd/ $@ doc: check_doc $(MAKE) -C $(src)/Documentation all doc_clean: $(MAKE) -C $(src)/Documentation clean install_doc: $(MAKE) -C $(src)/Documentation install check_doc: force $(Q)$(src)/check-manpages.sh $(src)/Documentation/libtracecmd clean: clean_meson $(RM) *.o *~ *.a *.so .*.d $(RM) tags TAGS cscope* $(PKG_CONFIG_SOURCE_FILE) $(VERSION_FILE) $(MAKE) -C $(src)/lib/trace-cmd clean $(MAKE) -C $(src)/lib/trace-cmd/plugins clean $(MAKE) -C $(src)/utest clean $(MAKE) -C $(src)/python clean $(MAKE) -C $(src)/tracecmd clean define build_uninstall_script $(Q)mkdir $(BUILD_OUTPUT)/tmp_build $(Q)$(MAKE) -C $(src) DESTDIR=$(BUILD_OUTPUT)/tmp_build O=$(BUILD_OUTPUT) $1 > /dev/null $(Q)find $(BUILD_OUTPUT)/tmp_build ! -type d -printf "%P\n" > $(BUILD_OUTPUT)/build_$2 $(Q)$(RM) -rf $(BUILD_OUTPUT)/tmp_build endef build_uninstall: $(BUILD_PREFIX) $(call build_uninstall_script,install,uninstall) $(BUILD_OUTPUT)/build_uninstall: build_uninstall build_libs_uninstall: $(BUILD_PREFIX) $(call build_uninstall_script,install_libs,libs_uninstall) $(BUILD_OUTPUT)/build_libs_uninstall: build_libs_uninstall define uninstall_file if [ -f $(DESTDIR)/$1 -o -h $(DESTDIR)/$1 ]; then \ $(call print_uninstall,$(DESTDIR)/$1)$(RM) $(DESTDIR)/$1; \ fi; endef uninstall: $(BUILD_OUTPUT)/build_uninstall @$(foreach file,$(shell cat $(BUILD_OUTPUT)/build_uninstall),$(call uninstall_file,$(file))) uninstall_libs: $(BUILD_OUTPUT)/build_libs_uninstall @$(foreach file,$(shell cat $(BUILD_OUTPUT)/build_libs_uninstall),$(call uninstall_file,$(file))) ##### PYTHON STUFF ##### report_noswig: force $(Q)echo $(Q)echo " NO_PYTHON forced: swig not installed, not compiling python plugins" $(Q)echo report_nopythondev: force $(Q)echo $(Q)echo " python-dev is not installed, not compiling python plugins" $(Q)echo ifndef NO_PYTHON PYTHON_INCLUDES = `$(PKG_CONFIG) --cflags $(PYTHON_PKGCONFIG_VERS)` PYTHON_LDFLAGS = `$(PKG_CONFIG) --libs $(PYTHON_PKGCONFIG_VERS)` \ $(shell $(PYTHON_VERS)-config --ldflags) PYGTK_CFLAGS = `$(PKG_CONFIG) --cflags pygtk-2.0` else PYTHON_INCLUDES = PYTHON_LDFLAGS = PYGTK_CFLAGS = endif export PYTHON_INCLUDES export PYTHON_LDFLAGS export PYGTK_CFLAGS ctracecmd.so: force $(LIBTRACECMD_STATIC) $(Q)$(MAKE) -C $(src)/python $@ PHONY += python python: $(PYTHON) meson: $(MAKE) -f Makefile.meson meson_install: $(MAKE) -f Makefile.meson install meson_docs: $(MAKE) -f Makefile.meson docs PHONY += clean_meson clean_meson: $(Q)$(MAKE) -f Makefile.meson $@ dist: git archive --format=tar --prefix=trace-cmd-$(TRACECMD_VERSION)/ HEAD \ > ../trace-cmd-$(TRACECMD_VERSION).tar cat ../trace-cmd-$(TRACECMD_VERSION).tar | \ bzip2 -c9 > ../trace-cmd-$(TRACECMD_VERSION).tar.bz2 cat ../trace-cmd-$(TRACECMD_VERSION).tar | \ xz -e -c8 > ../trace-cmd-$(TRACECMD_VERSION).tar.xz PHONY += force force: # Declare the contents of the .PHONY variable as phony. We keep that # information in a variable so we can use it in if_changed and friends. .PHONY: $(PHONY) trace-cmd-v3.3.1/Makefile.meson000066400000000000000000000017121470231550600163270ustar00rootroot00000000000000# SPDX-License-Identifier: GPL-2.0 undefine CFLAGS # Makefiles suck: This macro sets a default value of $(2) for the # variable named by $(1), unless the variable has been set by # environment or command line. This is necessary for CC and AR # because make sets default values, so the simpler ?= approach # won't work as expected. define allow-override $(if $(or $(findstring environment,$(origin $(1))),\ $(findstring command line,$(origin $(1)))),,\ $(eval $(1) = $(2))) endef $(call allow-override,MESON,meson) $(call allow-override,MESON_BUILD_DIR,build) all: compile PHONY += compile compile: $(MESON_BUILD_DIR) force $(MESON) compile -C $(MESON_BUILD_DIR) $(MESON_BUILD_DIR): $(MESON) setup --prefix=$(prefix) $(MESON_BUILD_DIR) install: compile $(MESON) install -C $(MESON_BUILD_DIR) docs: $(MESON_BUILD_DIR) $(MESON) compile -C build docs PHONY += clean_meson clean_meson: $(Q)$(RM) -rf $(MESON_BUILD_DIR) PHONY += force force: trace-cmd-v3.3.1/PACKAGING000066400000000000000000000020621470231550600147550ustar00rootroot00000000000000The libtracefs and libtraceevent packages are required for trace-cmd and libtracecmd.so In order to create a package directory with libtraceevent, libtracefs and libtracecmd and trace-cmd, you can follow these steps: git clone git://git.kernel.org/pub/scm/libs/libtrace/libtraceevent.git git clone git://git.kernel.org/pub/scm/libs/libtrace/libtracefs.git git clone git://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git cd libtraceevent INSTALL_PATH=/tmp/install ../trace-cmd/make-trace-cmd.sh install cd ../libtracefs INSTALL_PATH=/tmp/install ../trace-cmd/make-trace-cmd.sh install cd ../trace-cmd INSTALL_PATH=/tmp/install ./make-trace-cmd.sh install install_libs cd /tmp/install tar cvjf /tmp/trace-cmd-files.tar.bz2 . And then the tarball of /tmp/trace-cmd-files.tar.bz2 will can be extracted on another machine at the root directory, and trace-cmd will be installed there. Note, to define a prefix, add a PREFIX variable before calling make-trace-cmd.sh For example: PREFIX=/usr/local INSTALL_PATH=/tmp/install ./make-trace-cmd.sh install trace-cmd-v3.3.1/README000066400000000000000000000040211470231550600144230ustar00rootroot00000000000000 For more information on contributing please see: https://www.trace-cmd.org Note: The official repositiory for trace-cmd and KernelShark is here: git://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git For bug reports and issues, please file it here: https://bugzilla.kernel.org/buglist.cgi?component=Trace-cmd%2FKernelshark&product=Tools&resolution=--- These files make up the code that create the trace-cmd programs. This includes the GUI interface application kernelshark as well as trace-graph and trace-view. These files also make up the code to create the libtracecmd library. The applications are licensed under the GNU General Public License 2.0 (see COPYING) and the libraries are licensed under the GNU Lesser General Public License 2.1 (See COPYING.LIB). BUILDING: In order to install build dependencies on Debian / Ubuntu do the following: sudo apt-get install build-essential git pkg-config -y sudo apt-get install libtracefs-dev libtraceevent-dev -y In order to install build dependencies on Fedora, as root do the following: dnf install gcc make git pkg-config -y dnf install libtracefs-devel libtraceevent-devel -y In case your distribution does not have the required libtracefs and libtraceevent libraries, build and install them manually: git clone https://git.kernel.org/pub/scm/libs/libtrace/libtraceevent.git/ cd libtraceevent make sudo make install git clone https://git.kernel.org/pub/scm/libs/libtrace/libtracefs.git/ cd libtracefs make sudo make install To make trace-cmd make To make the gui make gui INSTALL: To install trace-cmd make install To install the gui make install_gui To install libtracecmd libraries make install_libs Note: The default install is relative to /usr/local The default install directory is /usr/local/bin The default plugin directory is /usr/local/lib/trace-cmd/plugins To change the default, you can set 'prefix', eg mkdir $HOME/test-trace make prefix=$HOME/test-trace make prefix=$HOME/test-trace install trace-cmd-v3.3.1/check-manpages.sh000077500000000000000000000026641470231550600167630ustar00rootroot00000000000000#!/bin/bash # SPDX-License-Identifier: LGPL-2.1 # Copyright (C) 2022, Google Inc, Steven Rostedt # # This checks if any function is listed in a man page that is not listed # in the main man page. if [ $# -lt 1 ]; then echo "usage: check-manpages man-page-path" exit 1 fi cd $1 MAIN=libtracecmd MAIN_FILE=${MAIN}.txt PROCESSED="" # Ignore man pages that do not contain functions IGNORE="" for man in ${MAIN}-*.txt; do for a in `sed -ne '/^NAME/,/^SYNOP/{/^[a-z]/{s/, *$//;s/,/\n/g;s/ //g;s/-.*$/-/;/-/{s/-//p;q};p}}' $man`; do if [ "${PROCESSED/:${a} /}" != "${PROCESSED}" ]; then P="${PROCESSED/:${a} */}" echo "Found ${a} in ${man} and in ${P/* /}" fi PROCESSED="${man}:${a} ${PROCESSED}" if [ "${IGNORE/$man/}" != "${IGNORE}" ]; then continue fi if ! grep -q '\*'${a}'\*' $MAIN_FILE; then if [ "$last" == "" ]; then echo fi if [ "$last" != "$man" ]; then echo "Missing functions from $MAIN_FILE that are in $man" last=$man fi echo " ${a}" fi done done DEPRECATED="" sed -ne 's/^[a-z].*[ \*]\([a-z_][a-z_]*\)(.*/\1/p' -e 's/^\([a-z_][a-z_]*\)(.*/\1/p' ../../include/trace-cmd/trace-cmd.h | while read f; do if ! grep -q '\*'${f}'\*' $MAIN_FILE; then if [ "${DEPRECATED/\*$f\*/}" != "${DEPRECATED}" ]; then continue; fi if [ "$last" == "" ]; then echo echo "Missing functions from $MAIN_FILE that are in tracefs.h" last=$f fi echo " ${f}" fi done trace-cmd-v3.3.1/features.mk000066400000000000000000000023461470231550600157220ustar00rootroot00000000000000# SPDX-License-Identifier: GPL-2.0 # taken from perf which was based on Linux Kbuild # try-cc # Usage: option = $(call try-cc, source-to-build, cc-options) try-cc = $(shell sh -c \ 'TMP="$(BUILD_OUTPUT)$(TMPOUT).$$$$"; \ echo "$(1)" | \ $(CC) -x c - $(2) -o "$$TMP" > /dev/null 2>&1 && echo y; \ rm -f "$$TMP"') define SOURCE_PTRACE #include #include int main (void) { int ret; ret = ptrace(PTRACE_ATTACH, 0, NULL, 0); ptrace(PTRACE_TRACEME, 0, NULL, 0); ptrace(PTRACE_GETSIGINFO, 0, NULL, NULL); ptrace(PTRACE_GETEVENTMSG, 0, NULL, NULL); ptrace(PTRACE_SETOPTIONS, NULL, NULL, PTRACE_O_TRACEFORK | PTRACE_O_TRACEVFORK | PTRACE_O_TRACECLONE | PTRACE_O_TRACEEXIT); ptrace(PTRACE_CONT, NULL, NULL, 0); ptrace(PTRACE_DETACH, 0, NULL, NULL); ptrace(PTRACE_SETOPTIONS, 0, NULL, PTRACE_O_TRACEFORK | PTRACE_O_TRACEVFORK | PTRACE_O_TRACECLONE | PTRACE_O_TRACEEXIT); return ret; } endef define SOURCE_AUDIT #include #include int main (void) { char *name; int ret; ret = audit_detect_machine(); if (ret < 0) return ret; name = audit_syscall_to_name(1, ret); if (!name) return -1; return ret; } endef trace-cmd-v3.3.1/include/000077500000000000000000000000001470231550600151715ustar00rootroot00000000000000trace-cmd-v3.3.1/include/linux/000077500000000000000000000000001470231550600163305ustar00rootroot00000000000000trace-cmd-v3.3.1/include/linux/time64.h000066400000000000000000000005341470231550600176130ustar00rootroot00000000000000/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _TOOLS_LINUX_TIME64_H #define _TOOLS_LINUX_TIME64_H #define MSEC_PER_SEC 1000L #define USEC_PER_MSEC 1000L #define NSEC_PER_USEC 1000L #define NSEC_PER_MSEC 1000000L #define USEC_PER_SEC 1000000L #define NSEC_PER_SEC 1000000000L #define FSEC_PER_SEC 1000000000000000LL #endif /* _LINUX_TIME64_H */ trace-cmd-v3.3.1/include/trace-cmd/000077500000000000000000000000001470231550600170305ustar00rootroot00000000000000trace-cmd-v3.3.1/include/trace-cmd/trace-cmd.h000066400000000000000000000105731470231550600210460ustar00rootroot00000000000000/* SPDX-License-Identifier: LGPL-2.1 */ /* * Copyright (C) 2008, 2009, 2010 Red Hat Inc, Steven Rostedt * */ #ifndef _TRACE_CMD_H #define _TRACE_CMD_H #include "event-parse.h" #include "tracefs.h" #ifdef __cplusplus extern "C" { #endif struct tracecmd_input; enum tracecmd_open_flags { TRACECMD_FL_LOAD_NO_PLUGINS = 1 << 0, /* Do not load plugins */ TRACECMD_FL_LOAD_NO_SYSTEM_PLUGINS = 1 << 1, /* Do not load system plugins */ }; enum tracecmd_section_flags { TRACECMD_SEC_FL_COMPRESS = 1 << 0, /* the section is compressed */ }; struct tracecmd_input *tracecmd_open_head(const char *file, int flags); struct tracecmd_input *tracecmd_open(const char *file, int flags); struct tracecmd_input *tracecmd_open_fd(int fd, int flags); void tracecmd_close(struct tracecmd_input *handle); int tracecmd_init_data(struct tracecmd_input *handle); struct tep_record * tracecmd_read_cpu_first(struct tracecmd_input *handle, int cpu); struct tep_record * tracecmd_read_data(struct tracecmd_input *handle, int cpu); struct tep_record * tracecmd_read_at(struct tracecmd_input *handle, unsigned long long offset, int *cpu); void tracecmd_free_record(struct tep_record *record); struct tep_handle *tracecmd_get_tep(struct tracecmd_input *handle); unsigned long long tracecmd_get_traceid(struct tracecmd_input *handle); int tracecmd_get_guest_cpumap(struct tracecmd_input *handle, unsigned long long trace_id, const char **name, int *vcpu_count, const int **cpu_pid); unsigned long long tracecmd_get_first_ts(struct tracecmd_input *handle); void tracecmd_add_ts_offset(struct tracecmd_input *handle, long long offset); int tracecmd_get_tsc2nsec(struct tracecmd_input *handle, int *mult, int *shift, unsigned long long *offset); int tracecmd_buffer_instances(struct tracecmd_input *handle); const char *tracecmd_buffer_instance_name(struct tracecmd_input *handle, int indx); struct tracecmd_input *tracecmd_buffer_instance_handle(struct tracecmd_input *handle, int indx); void tracecmd_set_private(struct tracecmd_input *handle, void *data); void *tracecmd_get_private(struct tracecmd_input *handle); int tracecmd_follow_event(struct tracecmd_input *handle, const char *system, const char *event_name, int (*callback)(struct tracecmd_input *handle, struct tep_event *, struct tep_record *, int, void *), void *callback_data); int tracecmd_follow_missed_events(struct tracecmd_input *handle, int (*callback)(struct tracecmd_input *handle, struct tep_event *, struct tep_record *, int, void *), void *callback_data); int tracecmd_iterate_reset(struct tracecmd_input *handle); int tracecmd_iterate_events(struct tracecmd_input *handle, cpu_set_t *cpus, int cpu_size, int (*callback)(struct tracecmd_input *handle, struct tep_record *, int, void *), void *callback_data); int tracecmd_iterate_events_multi(struct tracecmd_input **handles, int nr_handles, int (*callback)(struct tracecmd_input *handle, struct tep_record *, int, void *), void *callback_data); int tracecmd_iterate_events_reverse(struct tracecmd_input *handle, cpu_set_t *cpus, int cpu_size, int (*callback)(struct tracecmd_input *handle, struct tep_record *, int, void *), void *callback_data, bool cont); void tracecmd_set_loglevel(enum tep_loglevel level); enum tracecmd_filters { TRACECMD_FILTER_NONE = TEP_ERRNO__NO_FILTER, TRACECMD_FILTER_NOT_FOUND = TEP_ERRNO__FILTER_NOT_FOUND, TRACECMD_FILTER_MISS = TEP_ERRNO__FILTER_MISS, TRACECMD_FILTER_MATCH = TEP_ERRNO__FILTER_MATCH, }; struct tracecmd_filter; struct tracecmd_filter *tracecmd_filter_add(struct tracecmd_input *handle, const char *filter_str, bool neg); struct tracecmd_cpu_map; int tracecmd_map_vcpus(struct tracecmd_input **handles, int nr_handles); struct tracecmd_cpu_map *tracecmd_get_cpu_map(struct tracecmd_input *handle, int cpu); struct tracecmd_cpu_map *tracecmd_map_find_by_host_pid(struct tracecmd_input *handle, int host_pid); struct tracecmd_input *tracecmd_map_get_guest(struct tracecmd_cpu_map *map); int tracecmd_map_get_host_pid(struct tracecmd_cpu_map *map); void tracecmd_map_set_private(struct tracecmd_cpu_map *map, void *priv); void *tracecmd_map_get_private(struct tracecmd_cpu_map *map); #ifdef __cplusplus } #endif #endif /* _TRACE_CMD_H */ trace-cmd-v3.3.1/include/version.h000066400000000000000000000002671470231550600170340ustar00rootroot00000000000000#ifndef _VERSION_H #define _VERSION_H #define VERSION(a, b) (((a) << 8) + (b)) #ifdef BUILDGUI #include "ks_version.h" #else #include "tc_version.h" #endif #endif /* _VERSION_H */ trace-cmd-v3.3.1/lib/000077500000000000000000000000001470231550600143145ustar00rootroot00000000000000trace-cmd-v3.3.1/lib/Documentation000077700000000000000000000000001470231550600220122../Documentationustar00rootroot00000000000000trace-cmd-v3.3.1/lib/check-manpages.sh000077700000000000000000000000001470231550600231262../check-manpages.shustar00rootroot00000000000000trace-cmd-v3.3.1/lib/meson.build000066400000000000000000000067121470231550600164640ustar00rootroot00000000000000# SPDX-License-Identifier: LGPL-2.1 # # Copyright (c) 2023 Daniel Wagner, SUSE LLC project( 'libtracecmd', ['c'], meson_version: '>= 0.50.0', license: 'GPL-2.0', version: '1.5.2', default_options: [ 'c_std=gnu99', 'buildtype=debug', 'default_library=both', 'prefix=/usr/local', 'warning_level=1']) cc = meson.get_compiler('c') prefixdir = get_option('prefix') mandir = join_paths(prefixdir, get_option('mandir')) htmldir = join_paths(prefixdir, get_option('htmldir')) libtracecmd_standalone_build = true library_version = meson.project_version() conf = configuration_data() libtraceevent_dep = dependency('libtraceevent', version: '>= 1.5.0', required: true) libtracefs_dep = dependency('libtracefs', version: '>= 1.6.0', required: true) threads_dep = dependency('threads', required: true) dl_dep = cc.find_library('dl', required : false) zlib_dep = dependency('zlib', required: false) conf.set('HAVE_ZLIB', zlib_dep.found(), description: 'Is zlib avialable?') libzstd_dep = dependency('libzstd', version: '>= 1.4.0', required: false) conf.set('HAVE_ZSTD', libzstd_dep.found(), description: 'Is libzstd available?') cunit_dep = dependency('cunit', required : false) vsock_defined = get_option('vsock') and cc.has_header('linux/vm_sockets.h') conf.set('VSOCK', vsock_defined, description: 'Is vsock available?') perf_defined = cc.has_header('linux/perf_event.h') conf.set('PERF', perf_defined, description: 'Is perf available?') have_ptrace = get_option('ptrace') and cc.compiles( ''' #include #include int main (void) { int ret; ret = ptrace(PTRACE_ATTACH, 0, NULL, 0); ptrace(PTRACE_TRACEME, 0, NULL, 0); ptrace(PTRACE_GETSIGINFO, 0, NULL, NULL); ptrace(PTRACE_GETEVENTMSG, 0, NULL, NULL); ptrace(PTRACE_SETOPTIONS, NULL, NULL, PTRACE_O_TRACEFORK | PTRACE_O_TRACEVFORK | PTRACE_O_TRACECLONE | PTRACE_O_TRACEEXIT); ptrace(PTRACE_CONT, NULL, NULL, 0); ptrace(PTRACE_DETACH, 0, NULL, NULL); ptrace(PTRACE_SETOPTIONS, 0, NULL, PTRACE_O_TRACEFORK | PTRACE_O_TRACEVFORK | PTRACE_O_TRACECLONE | PTRACE_O_TRACEEXIT); return ret; } ''', name: 'ptrace') if not have_ptrace conf.set10('NO_PTRACE', true, description: 'Is ptrace missing?') conf.set('WARN_NO_PTRACE', true, description: 'Issue no ptrace warning?') endif audit_dep = dependency('audit', required: false) if not audit_dep.found() conf.set10('NO_AUDIT', true, description: 'Is audit missing?') conf.set('WARN_NO_AUDIT', true, description: 'Issue no audit warning?') endif add_project_arguments( [ '-D_GNU_SOURCE', '-include', 'trace-cmd/include/private/config.h', ], language : 'c') libtracecmd_ext_incdir = include_directories( [ '../include', '../include/trace-cmd', '../tracecmd/include' ]) subdir('trace-cmd/include') subdir('trace-cmd/include/private') subdir('trace-cmd') if libtracecmd_standalone_build subdir('Documentation/libtracecmd') custom_target( 'docs', output: 'docs', depends: [html, man], command: ['echo']) endif install_headers( '../include/trace-cmd/trace-cmd.h', subdir: 'trace-cmd') trace-cmd-v3.3.1/lib/meson_options.txt000066400000000000000000000016511470231550600177540ustar00rootroot00000000000000# -*- mode: meson -*- # SPDX-License-Identifier: LGPL-2.1 option('vsock', type : 'boolean', value : true, description : 'build with vsock support') option('ptrace', type : 'boolean', value : true, description : 'build with ptrace support') option('htmldir', type : 'string', value : 'share/doc/libtracecmd-doc', description : 'directory for HTML documentation') option('asciidoctor', type : 'boolean', value: false, description : 'use asciidoctor instead of asciidoc') option('docbook-xls-172', type : 'boolean', value : false, description : 'enable docbook XLS 172 workaround') option('asciidoc-no-roff', type : 'boolean', value : false, description : 'enable no roff workaround') option('man-bold-literal', type : 'boolean', value : false, description : 'enable bold literals') option('docbook-suppress-sp', type : 'boolean', value : false, description : 'docbook suppress sp') trace-cmd-v3.3.1/lib/trace-cmd/000077500000000000000000000000001470231550600161535ustar00rootroot00000000000000trace-cmd-v3.3.1/lib/trace-cmd/Makefile000066400000000000000000000063741470231550600176250ustar00rootroot00000000000000# SPDX-License-Identifier: GPL-2.0 include $(src)/scripts/utils.mk bdir:=$(obj)/lib/trace-cmd ldir:=$(src)/lib/trace-cmd DEFAULT_TARGET = $(LIBTRACECMD_STATIC) OBJS = OBJS += trace-hash.o OBJS += trace-rbtree.o OBJS += trace-hooks.o OBJS += trace-input.o OBJS += trace-output.o OBJS += trace-recorder.o OBJS += trace-util.o OBJS += trace-filter-hash.o OBJS += trace-filter.o OBJS += trace-msg.o OBJS += trace-plugin.o OBJS += trace-maps.o ifeq ($(PERF_DEFINED), 1) OBJS += trace-perf.o endif OBJS += trace-timesync.o OBJS += trace-timesync-ptp.o ifeq ($(VSOCK_DEFINED), 1) OBJS += trace-timesync-kvm.o endif OBJS += trace-compress.o ifeq ($(ZLIB_INSTALLED), 1) OBJS += trace-compress-zlib.o endif ifeq ($(ZSTD_INSTALLED), 1) OBJS += trace-compress-zstd.o endif # Additional util objects OBJS += trace-blk-hack.o OBJS += trace-ftrace.o OBJS := $(OBJS:%.o=$(bdir)/%.o) DEPS := $(OBJS:$(bdir)/%.o=$(bdir)/.%.d) all: $(DEFAULT_TARGET) $(bdir): @mkdir -p $(bdir) $(OBJS): | $(bdir) $(DEPS): | $(bdir) $(LIBTRACECMD_STATIC): $(OBJS) $(Q)$(call do_build_static_lib) LPTHREAD ?= -lpthread LIBS = $(LIBTRACEEVENT_LDLAGS) $(LIBTRACEFS_LDLAGS) $(ZLIB_LDLAGS) $(LIBZSTD_LDLAGS) $(LPTHREAD) $(LIBTRACECMD_SHARED_VERSION): $(LIBTRACECMD_SHARED) @ln -sf $( $@ $(OBJS): $(bdir)/%.o : $(bdir)/.%.d ifeq ("$(DESTDIR)", "") # If DESTDIR is not defined, then test if after installing the library # and running ldconfig, if the library is visible by ld.so. # If not, add the path to /etc/ld.so.conf.d/trace.conf and run ldconfig again. define install_ld_config if $(LDCONFIG); then \ if ! grep -q "^$(libdir)$$" $(LD_SO_CONF_PATH)/* ; then \ echo here;\ $(CC) -o $(bdir)/test $(ldir)/test.c -I $(includedir_SQ) \ -L $(libdir_SQ) -ltracecmd &> /dev/null; \ if ! $(bdir)/test &> /dev/null; then \ $(call print_install,trace.conf,$(LD_SO_CONF_PATH)) \ echo $(libdir_SQ) >> $(LD_SO_CONF_PATH)/trace.conf; \ $(LDCONFIG); \ fi; \ $(RM) $(bdir)/test; \ fi; \ fi endef else # If installing to a location for another machine or package, do not bother # with running ldconfig. define install_ld_config endef endif # DESTDIR = "" install_pkgconfig: $(PKG_CONFIG_FILE) $(Q)$(call do_install_pkgconfig_file,$(prefix)) install_libs: install_pkgconfig $(Q)$(call do_install,$(LIBTRACECMD_SHARED),$(libdir_SQ)) $(Q)$(call print_install,$(LIBTRACECMD_SHARED_VERSION),$(DESTDIR)$(libdir_SQ)) $(Q)cp -fpR $(LIBTRACECMD_SHARED_VERSION) $(DESTDIR)$(libdir_SQ) $(Q)$(call print_install,$(LIBTRACECMD_SHARED_SO),$(DESTDIR)$(libdir_SQ)) $(Q)cp -fpR $(LIBTRACECMD_SHARED_SO) $(DESTDIR)$(libdir_SQ) $(Q)$(call do_install,$(src)/include/trace-cmd/trace-cmd.h,$(includedir_SQ)/trace-cmd,644) $(Q)$(call install_ld_config) dep_includes := $(wildcard $(DEPS)) ifneq ($(dep_includes),) include $(dep_includes) endif clean: $(RM) $(bdir)/*.a $(bdir)/*.so $(bdir)/*.so.* $(bdir)/*.o $(bdir)/.*.d .PHONY: clean PHONY += force force: trace-cmd-v3.3.1/lib/trace-cmd/include/000077500000000000000000000000001470231550600175765ustar00rootroot00000000000000trace-cmd-v3.3.1/lib/trace-cmd/include/meson.build000066400000000000000000000002041470231550600217340ustar00rootroot00000000000000# SPDX-License-Identifier: LGPL-2.1 # # Copyright (c) 2023 Daniel Wagner, SUSE LLC libtracecmd_incdir = include_directories(['.']) trace-cmd-v3.3.1/lib/trace-cmd/include/private/000077500000000000000000000000001470231550600212505ustar00rootroot00000000000000trace-cmd-v3.3.1/lib/trace-cmd/include/private/meson.build000066400000000000000000000005111470231550600234070ustar00rootroot00000000000000# SPDX-License-Identifier: LGPL-2.1 # # Copyright (c) 2023 Daniel Wagner, SUSE LLC config_h = configure_file( output: 'config.h', configuration: conf ) libtracecmd_private_incdir = include_directories(['.']) config_dep = declare_dependency( include_directories : libtracecmd_private_incdir, sources: config_h) trace-cmd-v3.3.1/lib/trace-cmd/include/private/trace-cmd-private-python.h000066400000000000000000000013501470231550600262460ustar00rootroot00000000000000/* SPDX-License-Identifier: LGPL-2.1 */ /* * Private interface exposed to the python module. See python/ctracecmd.i and * python/tracecmd.py. */ #ifndef _TRACE_CMD_PRIVATE_PYTHON_H #define _TRACE_CMD_PRIVATE_PYTHON_H int tracecmd_long_size(struct tracecmd_input *handle); int tracecmd_cpus(struct tracecmd_input *handle); struct tep_record * tracecmd_read_next_data(struct tracecmd_input *handle, int *rec_cpu); struct tep_record * tracecmd_peek_data(struct tracecmd_input *handle, int cpu); static inline struct tep_record * tracecmd_peek_data_ref(struct tracecmd_input *handle, int cpu) { struct tep_record *rec = tracecmd_peek_data(handle, cpu); if (rec) rec->ref_count++; return rec; } #endif /* _TRACE_CMD_PRIVATE_PYTHON_H */ trace-cmd-v3.3.1/lib/trace-cmd/include/private/trace-cmd-private.h000066400000000000000000000575151470231550600247450ustar00rootroot00000000000000/* SPDX-License-Identifier: LGPL-2.1 */ /* * Copyright (C) 2008, 2009, 2010 Red Hat Inc, Steven Rostedt * */ #ifndef _TRACE_CMD_PRIVATE_H #define _TRACE_CMD_PRIVATE_H #include /* for iovec */ #include #include "event-parse.h" #include "trace-cmd/trace-cmd.h" #include "trace-cmd-private-python.h" #define __packed __attribute__((packed)) #define __hidden __attribute__((visibility ("hidden"))) #define TRACECMD_MAGIC { 23, 8, 68 } #define ARRAY_SIZE(_a) (sizeof(_a) / sizeof((_a)[0])) #define __weak __attribute__((weak)) #define __noreturn __attribute__((noreturn)) #define TRACECMD_ERR_MSK ((unsigned long)(-1) & ~((1UL << 14) - 1)) #define TRACECMD_ISERR(ptr) ((unsigned long)(ptr) > TRACECMD_ERR_MSK) #define TRACECMD_ERROR(ret) ((void *)((unsigned long)(ret) | TRACECMD_ERR_MSK)) #define TRACECMD_PTR2ERR(ptr) ((unisgned long)(ptr) & ~TRACECMD_ERR_MSK) #define TSCNSEC_CLOCK "tsc2nsec" struct tep_plugin_list *trace_load_plugins(struct tep_handle *tep, int flags); int *tracecmd_add_id(int *list, int id, int len); #define FILE_VERSION_MIN 6 #define FILE_VERSION_MAX 7 #define FILE_VERSION_SECTIONS 7 #define FILE_VERSION_COMPRESSION 7 enum { RINGBUF_TYPE_PADDING = 29, RINGBUF_TYPE_TIME_EXTEND = 30, RINGBUF_TYPE_TIME_STAMP = 31, }; /* Can be overridden */ void tracecmd_debug(const char *fmt, ...); void tracecmd_record_ref(struct tep_record *record); void tracecmd_set_debug(bool set_debug); bool tracecmd_get_debug(void); void tracecmd_set_notimeout(bool set_notimeout); bool tracecmd_get_notimeout(void); bool tracecmd_is_version_supported(unsigned int version); int tracecmd_default_file_version(void); struct tracecmd_output; struct tracecmd_recorder; struct hook_list; /* --- tracecmd plugins --- */ enum tracecmd_context { TRACECMD_INPUT, TRACECMD_OUTPUT, }; enum tracecmd_plugin_flag { TRACECMD_DISABLE_SYS_PLUGINS = 1, TRACECMD_DISABLE_PLUGINS = 1 << 1, }; struct trace_plugin_context; struct trace_plugin_context * tracecmd_plugin_context_create(enum tracecmd_context context, void *data); void tracecmd_plugin_set_flag(struct trace_plugin_context *context, enum tracecmd_plugin_flag flag); #define TRACECMD_PLUGIN_LOADER tracecmd_plugin_loader #define TRACECMD_PLUGIN_UNLOADER tracecmd_plugin_unloader #define TRACECMD_PLUGIN_ALIAS tracecmd_plugin_alias #define _MAKE_STR(x) #x #define MAKE_STR(x) _MAKE_STR(x) #define TRACECMD_PLUGIN_LOADER_NAME MAKE_STR(TRACECMD_PLUGIN_LOADER) #define TRACECMD_PLUGIN_UNLOADER_NAME MAKE_STR(TRACECMD_PLUGIN_UNLOADER) #define TRACECMD_PLUGIN_ALIAS_NAME MAKE_STR(TRACECMD_PLUGIN_ALIAS) typedef int (*tracecmd_plugin_load_func)(struct trace_plugin_context *trace); typedef int (*tracecmd_plugin_unload_func)(struct trace_plugin_context *trace); struct tracecmd_input * tracecmd_plugin_context_input(struct trace_plugin_context *trace_context); struct tracecmd_output * tracecmd_plugin_context_output(struct trace_plugin_context *trace_context); void tracecmd_set_quiet(struct tracecmd_output *handle, bool set_quiet); bool tracecmd_get_quiet(struct tracecmd_output *handle); void tracecmd_set_out_clock(struct tracecmd_output *handle, const char *clock); const char *tracecmd_get_trace_clock(struct tracecmd_input *handle); const char *tracecmd_get_cpustats(struct tracecmd_input *handle); const char *tracecmd_get_uname(struct tracecmd_input *handle); const char *tracecmd_get_version(struct tracecmd_input *handle); off_t tracecmd_get_cpu_file_size(struct tracecmd_input *handle, int cpu); static inline int tracecmd_host_bigendian(void) { unsigned char str[] = { 0x1, 0x2, 0x3, 0x4 }; unsigned int *ptr; ptr = (unsigned int *)str; return *ptr == 0x01020304; } /* --- Opening and Reading the trace.dat file --- */ enum tracecmd_file_states { TRACECMD_FILE_ALLOCATED = 0, TRACECMD_FILE_INIT, TRACECMD_FILE_HEADERS, TRACECMD_FILE_FTRACE_EVENTS, TRACECMD_FILE_ALL_EVENTS, TRACECMD_FILE_KALLSYMS, TRACECMD_FILE_PRINTK, TRACECMD_FILE_CMD_LINES, TRACECMD_FILE_CPU_COUNT, TRACECMD_FILE_OPTIONS, TRACECMD_FILE_CPU_LATENCY, TRACECMD_FILE_CPU_FLYRECORD, }; enum { TRACECMD_OPTION_DONE, TRACECMD_OPTION_DATE, TRACECMD_OPTION_CPUSTAT, TRACECMD_OPTION_BUFFER, TRACECMD_OPTION_TRACECLOCK, TRACECMD_OPTION_UNAME, TRACECMD_OPTION_HOOK, TRACECMD_OPTION_OFFSET, TRACECMD_OPTION_CPUCOUNT, TRACECMD_OPTION_VERSION, TRACECMD_OPTION_PROCMAPS, TRACECMD_OPTION_TRACEID, TRACECMD_OPTION_TIME_SHIFT, TRACECMD_OPTION_GUEST, TRACECMD_OPTION_TSC2NSEC, TRACECMD_OPTION_STRINGS, TRACECMD_OPTION_HEADER_INFO, TRACECMD_OPTION_FTRACE_EVENTS, TRACECMD_OPTION_EVENT_FORMATS, TRACECMD_OPTION_KALLSYMS, TRACECMD_OPTION_PRINTK, TRACECMD_OPTION_CMDLINES, TRACECMD_OPTION_BUFFER_TEXT, TRACECMD_OPTION_MAX, }; enum { TRACECMD_FL_IGNORE_DATE = (1 << 0), TRACECMD_FL_BUFFER_INSTANCE = (1 << 1), TRACECMD_FL_IN_USECS = (1 << 2), TRACECMD_FL_RAW_TS = (1 << 3), TRACECMD_FL_SECTIONED = (1 << 4), TRACECMD_FL_COMPRESSION = (1 << 5), }; struct tracecmd_ftrace { struct tracecmd_input *handle; struct tep_event *fgraph_ret_event; int fgraph_ret_id; int long_size; }; struct tracecmd_proc_addr_map { size_t start; size_t end; char *lib_name; }; typedef void (*tracecmd_show_data_func)(struct tracecmd_input *handle, struct tep_record *record); typedef void (*tracecmd_handle_init_func)(struct tracecmd_input *handle, struct hook_list *hook, int global); struct tracecmd_input *tracecmd_alloc(const char *file, int flags); struct tracecmd_input *tracecmd_alloc_fd(int fd, int flags); void tracecmd_ref(struct tracecmd_input *handle); int tracecmd_read_headers(struct tracecmd_input *handle, enum tracecmd_file_states state); int tracecmd_get_parsing_failures(struct tracecmd_input *handle); int tracecmd_page_size(struct tracecmd_input *handle); int tracecmd_copy_headers(struct tracecmd_input *in_handle, struct tracecmd_output *out_handle, enum tracecmd_file_states start_state, enum tracecmd_file_states end_state); int tracecmd_copy_buffer_descr(struct tracecmd_input *in_handle, struct tracecmd_output *out_handle); int tracecmd_copy_options(struct tracecmd_input *in_handle, struct tracecmd_output *out_handle); int tracecmd_copy_trace_data(struct tracecmd_input *in_handle, struct tracecmd_output *out_handle); void tracecmd_set_flag(struct tracecmd_input *handle, int flag); void tracecmd_clear_flag(struct tracecmd_input *handle, int flag); unsigned long tracecmd_get_flags(struct tracecmd_input *handle); enum tracecmd_file_states tracecmd_get_file_state(struct tracecmd_input *handle); int tracecmd_enable_tsync(struct tracecmd_input *handle, bool enable); void tracecmd_parse_trace_clock(struct tracecmd_input *handle, char *file, int size); int tracecmd_make_pipe(struct tracecmd_input *handle, int cpu, int fd, int cpus); int tracecmd_is_buffer_instance(struct tracecmd_input *handle); void tracecmd_set_ts_offset(struct tracecmd_input *handle, long long offset); void tracecmd_set_ts2secs(struct tracecmd_input *handle, unsigned long long hz); void tracecmd_print_events(struct tracecmd_input *handle, const char *regex); struct hook_list *tracecmd_hooks(struct tracecmd_input *handle); void tracecmd_print_stats(struct tracecmd_input *handle); void tracecmd_print_uname(struct tracecmd_input *handle); void tracecmd_print_version(struct tracecmd_input *handle); int tracecmd_latency_data_read(struct tracecmd_input *handle, char **buf, size_t *size); struct tep_record * tracecmd_read_prev(struct tracecmd_input *handle, struct tep_record *record); struct tep_record * tracecmd_peek_next_data(struct tracecmd_input *handle, int *rec_cpu); struct tep_record * tracecmd_translate_data(struct tracecmd_input *handle, void *ptr, int size); struct tep_record * tracecmd_read_cpu_last(struct tracecmd_input *handle, int cpu); int tracecmd_refresh_record(struct tracecmd_input *handle, struct tep_record *record); int tracecmd_set_cpu_to_timestamp(struct tracecmd_input *handle, int cpu, unsigned long long ts); void tracecmd_set_all_cpus_to_timestamp(struct tracecmd_input *handle, unsigned long long time); int tracecmd_set_cursor(struct tracecmd_input *handle, int cpu, size_t offset); unsigned long long tracecmd_get_cursor(struct tracecmd_input *handle, int cpu); unsigned long tracecmd_get_in_file_version(struct tracecmd_input *handle); size_t tracecmd_get_options_offset(struct tracecmd_input *handle); int tracecmd_get_file_compress_proto(struct tracecmd_input *handle, const char **name, const char **version); int tracecmd_ftrace_overrides(struct tracecmd_input *handle, struct tracecmd_ftrace *finfo); bool tracecmd_get_use_trace_clock(struct tracecmd_input *handle); tracecmd_show_data_func tracecmd_get_show_data_func(struct tracecmd_input *handle); void tracecmd_set_show_data_func(struct tracecmd_input *handle, tracecmd_show_data_func func); int tracecmd_record_at_buffer_start(struct tracecmd_input *handle, struct tep_record *record); unsigned long long tracecmd_page_ts(struct tracecmd_input *handle, struct tep_record *record); unsigned int tracecmd_record_ts_delta(struct tracecmd_input *handle, struct tep_record *record); struct tracecmd_proc_addr_map * tracecmd_search_task_map(struct tracecmd_input *handle, int pid, unsigned long long addr); #ifndef SWIG /* hack for function graph work around */ extern __thread struct tracecmd_input *tracecmd_curr_thread_handle; #endif /* --- Creating and Writing the trace.dat file --- */ struct tracecmd_event_list { struct tracecmd_event_list *next; const char *glob; }; struct tracecmd_option; struct tracecmd_msg_handle; int tracecmd_output_set_msg(struct tracecmd_output *handle, struct tracecmd_msg_handle *msg_handle); int tracecmd_output_set_trace_dir(struct tracecmd_output *handle, const char *tracing_dir); int tracecmd_output_set_kallsyms(struct tracecmd_output *handle, const char *kallsyms); int tracecmd_output_set_from_input(struct tracecmd_output *handle, struct tracecmd_input *ihandle); int tracecmd_output_set_version(struct tracecmd_output *handle, int file_version); int tracecmd_output_set_compression(struct tracecmd_output *handle, const char *compression); int tracecmd_output_write_headers(struct tracecmd_output *handle, struct tracecmd_event_list *list); struct tracecmd_output *tracecmd_output_create(const char *output_file); struct tracecmd_output *tracecmd_output_create_fd(int fd); struct tracecmd_output *tracecmd_create_file_latency(const char *output_file, int cpus, int file_version, const char *compression); struct tracecmd_option *tracecmd_add_option(struct tracecmd_output *handle, unsigned short id, int size, const void *data); struct tracecmd_option * tracecmd_add_option_v(struct tracecmd_output *handle, unsigned short id, const struct iovec *vector, int count); int tracecmd_add_buffer_info(struct tracecmd_output *handle, const char *name, int cpus); int tracecmd_write_buffer_info(struct tracecmd_output *handle); int tracecmd_write_cpus(struct tracecmd_output *handle, int cpus); int tracecmd_write_cmdlines(struct tracecmd_output *handle); int tracecmd_prepare_options(struct tracecmd_output *handle, off_t offset, int whence); int tracecmd_write_options(struct tracecmd_output *handle); int tracecmd_write_meta_strings(struct tracecmd_output *handle); int tracecmd_append_options(struct tracecmd_output *handle); void tracecmd_output_close(struct tracecmd_output *handle); void tracecmd_output_flush(struct tracecmd_output *handle); void tracecmd_output_free(struct tracecmd_output *handle); struct tracecmd_output *tracecmd_copy(struct tracecmd_input *ihandle, const char *file, enum tracecmd_file_states state, int file_version, const char *compression); int tracecmd_write_cpu_data(struct tracecmd_output *handle, int cpus, char * const *cpu_data_files, const char *buff_name); int tracecmd_append_cpu_data(struct tracecmd_output *handle, int cpus, char * const *cpu_data_files); int tracecmd_append_buffer_cpu_data(struct tracecmd_output *handle, const char *name, int cpus, char * const *cpu_data_files); struct tracecmd_output *tracecmd_get_output_handle_fd(int fd); unsigned long tracecmd_get_out_file_version(struct tracecmd_output *handle); size_t tracecmd_get_out_file_offset(struct tracecmd_output *handle); /* --- Reading the Fly Recorder Trace --- */ enum { TRACECMD_RECORD_NOSPLICE = (1 << 0), /* Use read instead of splice */ TRACECMD_RECORD_SNAPSHOT = (1 << 1), /* Extract from snapshot */ TRACECMD_RECORD_BLOCK_SPLICE = (1 << 2), /* Block on splice write */ TRACECMD_RECORD_NOBRASS = (1 << 3), /* Splice directly without a brass pipe */ TRACECMD_RECORD_POLL = (1 << 4), /* Use O_NONBLOCK, poll trace buffers */ }; void tracecmd_free_recorder(struct tracecmd_recorder *recorder); struct tracecmd_recorder *tracecmd_create_recorder(const char *file, int cpu, unsigned flags); struct tracecmd_recorder *tracecmd_create_recorder_fd(int fd, int cpu, unsigned flags); struct tracecmd_recorder *tracecmd_create_recorder_virt(const char *file, int cpu, unsigned flags, int trace_fd, int maxkb); struct tracecmd_recorder *tracecmd_create_recorder_maxkb(const char *file, int cpu, unsigned flags, int maxkb); struct tracecmd_recorder *tracecmd_create_buffer_recorder_fd(int fd, int cpu, unsigned flags, struct tracefs_instance *instance); struct tracecmd_recorder *tracecmd_create_buffer_recorder(const char *file, int cpu, unsigned flags, struct tracefs_instance *instance); struct tracecmd_recorder *tracecmd_create_buffer_recorder_maxkb(const char *file, int cpu, unsigned flags, struct tracefs_instance *instance, int maxkb); int tracecmd_start_recording(struct tracecmd_recorder *recorder, unsigned long sleep); int tracecmd_stop_recording(struct tracecmd_recorder *recorder); long tracecmd_flush_recording(struct tracecmd_recorder *recorder, bool finish); enum tracecmd_msg_flags { TRACECMD_MSG_FL_USE_TCP = 1 << 0, TRACECMD_MSG_FL_USE_VSOCK = 1 << 1, TRACECMD_MSG_FL_PROXY = 1 << 2, }; #define MSG_CACHE_FILE "/tmp/trace_msg_cacheXXXXXX" /* for both client and server */ struct tracecmd_msg_handle { int fd; short cpu_count; short version; /* Current protocol version */ unsigned long flags; off_t cache_start_offset; bool done; bool cache; int cfd; #ifndef HAVE_MEMFD_CREATE char cfile[sizeof(MSG_CACHE_FILE)]; #endif }; struct tracecmd_tsync_protos { char **names; }; struct tracecmd_msg_handle * tracecmd_msg_handle_alloc(int fd, unsigned long flags); int tracecmd_msg_handle_cache(struct tracecmd_msg_handle *msg_handle); /* Closes the socket and frees the handle */ void tracecmd_msg_handle_close(struct tracecmd_msg_handle *msg_handle); /* for clients */ int tracecmd_msg_send_init_data(struct tracecmd_msg_handle *msg_handle, unsigned int **client_ports); int tracecmd_msg_data_send(struct tracecmd_msg_handle *msg_handle, const char *buf, int size); int tracecmd_msg_finish_sending_data(struct tracecmd_msg_handle *msg_handle); int tracecmd_msg_flush_data(struct tracecmd_msg_handle *msg_handle); int tracecmd_msg_send_close_msg(struct tracecmd_msg_handle *msg_handle); int tracecmd_msg_send_close_resp_msg(struct tracecmd_msg_handle *msg_handle); int tracecmd_msg_wait_close(struct tracecmd_msg_handle *msg_handle); int tracecmd_msg_wait_close_resp(struct tracecmd_msg_handle *msg_handle); int tracecmd_msg_cont(struct tracecmd_msg_handle *msg_handle); int tracecmd_msg_wait(struct tracecmd_msg_handle *msg_handle); /* for server */ int tracecmd_msg_initial_setting(struct tracecmd_msg_handle *msg_handle); int tracecmd_msg_send_port_array(struct tracecmd_msg_handle *msg_handle, unsigned *ports); int tracecmd_msg_read_data(struct tracecmd_msg_handle *msg_handle, int ofd); int tracecmd_msg_collect_data(struct tracecmd_msg_handle *msg_handle, int ofd); bool tracecmd_msg_done(struct tracecmd_msg_handle *msg_handle); void tracecmd_msg_set_done(struct tracecmd_msg_handle *msg_handle); int tracecmd_msg_read_options(struct tracecmd_msg_handle *msg_handle, struct tracecmd_output *handle); int tracecmd_msg_send_options(struct tracecmd_msg_handle *msg_handle, struct tracecmd_output *handle); int tracecmd_msg_send_trace_req(struct tracecmd_msg_handle *msg_handle, int argc, char **argv, bool use_fifos, unsigned long long trace_id, struct tracecmd_tsync_protos *protos); int tracecmd_msg_send_trace_proxy(struct tracecmd_msg_handle *msg_handle, int argc, char **argv, bool use_fifos, unsigned long long trace_id, struct tracecmd_tsync_protos *protos, unsigned int nr_cpus, unsigned int siblings); int tracecmd_msg_recv_trace_req(struct tracecmd_msg_handle *msg_handle, int *argc, char ***argv, bool *use_fifos, unsigned long long *trace_id, struct tracecmd_tsync_protos **protos); int tracecmd_msg_recv_trace_proxy(struct tracecmd_msg_handle *msg_handle, int *argc, char ***argv, bool *use_fifos, unsigned long long *trace_id, struct tracecmd_tsync_protos **protos, unsigned int *cpus, unsigned int *siblings); int tracecmd_msg_send_trace_resp(struct tracecmd_msg_handle *msg_handle, int nr_cpus, int page_size, unsigned int *ports, bool use_fifos, unsigned long long trace_id, const char *tsync_proto, unsigned int tsync_port); int tracecmd_msg_recv_trace_resp(struct tracecmd_msg_handle *msg_handle, int *nr_cpus, int *page_size, unsigned int **ports, bool *use_fifos, unsigned long long *trace_id, char **tsync_proto, unsigned int *tsync_port); int tracecmd_msg_send_time_sync(struct tracecmd_msg_handle *msg_handle, char *sync_protocol, unsigned int sync_msg_id, unsigned int payload_size, char *payload); int tracecmd_msg_recv_time_sync(struct tracecmd_msg_handle *msg_handle, char *sync_protocol, unsigned int *sync_msg_id, unsigned int *payload_size, char **payload); enum tracecmd_clocks { TRACECMD_CLOCK_UNKNOWN = 0, TRACECMD_CLOCK_LOCAL = 1, TRACECMD_CLOCK_GLOBAL = 1 << 1, TRACECMD_CLOCK_COUNTER = 1 << 2, TRACECMD_CLOCK_UPTIME = 1 << 3, TRACECMD_CLOCK_PERF = 1 << 4, TRACECMD_CLOCK_MONO = 1 << 5, TRACECMD_CLOCK_MONO_RAW = 1 << 6, TRACECMD_CLOCK_BOOT = 1 << 7, TRACECMD_CLOCK_X86_TSC = 1 << 8 }; enum tracecmd_clocks tracecmd_clock_str2id(const char *clock); const char *tracecmd_clock_id2str(enum tracecmd_clocks clock); /* --- Timestamp synchronization --- */ struct tracecmd_time_sync; #define TRACECMD_TSYNC_PNAME_LENGTH 16 #define TRACECMD_TSYNC_PROTO_NONE "none" enum{ TRACECMD_TIME_SYNC_CMD_PROBE = 1, TRACECMD_TIME_SYNC_CMD_STOP = 2, }; enum tracecmd_time_sync_role { TRACECMD_TIME_SYNC_ROLE_HOST = (1 << 0), TRACECMD_TIME_SYNC_ROLE_GUEST = (1 << 1), TRACECMD_TIME_SYNC_ROLE_CLIENT = (1 << 2), TRACECMD_TIME_SYNC_ROLE_SERVER = (1 << 3), }; /* Timestamp synchronization flags */ #define TRACECMD_TSYNC_FLAG_INTERPOLATE 0x1 void tracecmd_tsync_init(void); int tracecmd_tsync_proto_getall(struct tracecmd_tsync_protos **protos, const char *clock, int role); bool tsync_proto_is_supported(const char *proto_name); struct tracecmd_time_sync * tracecmd_tsync_with_host(int fd, const char *proto, const char *clock, int remote_id, int local_id); int tracecmd_tsync_with_host_stop(struct tracecmd_time_sync *tsync); struct tracecmd_time_sync * tracecmd_tsync_with_guest(unsigned long long trace_id, int loop_interval, unsigned int fd, int guest_pid, int guest_cpus, const char *proto_name, const char *clock); int tracecmd_tsync_with_guest_stop(struct tracecmd_time_sync *tsync); int tracecmd_tsync_get_offsets(struct tracecmd_time_sync *tsync, int cpu, int *count, long long **ts, long long **offsets, long long **scalings, long long **frac); const char *tracecmd_tsync_get_proto(const struct tracecmd_tsync_protos *protos, const char *clock, enum tracecmd_time_sync_role role); void tracecmd_tsync_free(struct tracecmd_time_sync *tsync); int tracecmd_write_guest_time_shift(struct tracecmd_output *handle, struct tracecmd_time_sync *tsync); /* --- Compression --- */ struct tracecmd_compress_chunk { unsigned int size; unsigned int zsize; off_t zoffset; off_t offset; }; struct tracecmd_compression; struct tracecmd_compression_proto { int weight; const char *name; const char *version; int (*compress)(void *ctx, const void *in, int in_bytes, void *out, int out_bytes); int (*uncompress)(void *ctx, const void *in, int in_bytes, void *out, int out_bytes); unsigned int (*compress_size)(void *ctx, unsigned int bytes); bool (*is_supported)(const char *name, const char *version); void *(*new_context)(void); void (*free_context)(void *ctx); }; struct tracecmd_compression *tracecmd_compress_alloc(const char *name, const char *version, int fd, struct tep_handle *tep, struct tracecmd_msg_handle *msg_handle); void tracecmd_compress_destroy(struct tracecmd_compression *handle); int tracecmd_compress_block(struct tracecmd_compression *handle); int tracecmd_uncompress_block(struct tracecmd_compression *handle); void tracecmd_compress_reset(struct tracecmd_compression *handle); ssize_t tracecmd_compress_buffer_read(struct tracecmd_compression *handle, char *dst, size_t len); ssize_t tracecmd_compress_pread(struct tracecmd_compression *handle, char *dst, size_t len, off_t offset); int tracecmd_compress_buffer_write(struct tracecmd_compression *handle, const void *data, size_t size); off_t tracecmd_compress_lseek(struct tracecmd_compression *handle, off_t offset, int whence); int tracecmd_compress_proto_get_name(struct tracecmd_compression *compress, const char **name, const char **version); bool tracecmd_compress_is_supported(const char *name, const char *version); int tracecmd_compress_protos_get(char ***names, char ***versions); int tracecmd_compress_proto_register(struct tracecmd_compression_proto *proto); int tracecmd_compress_copy_from(struct tracecmd_compression *handle, int fd, int chunk_size, size_t *read_size, size_t *write_size); int tracecmd_uncompress_copy_to(struct tracecmd_compression *handle, int fd, size_t *read_size, size_t *write_size); int tracecmd_uncompress_chunk(struct tracecmd_compression *handle, struct tracecmd_compress_chunk *chunk, char *data); int tracecmd_load_chunks_info(struct tracecmd_compression *handle, struct tracecmd_compress_chunk **chunks_info); /* --- Plugin handling --- */ extern struct tep_plugin_option trace_ftrace_options[]; char **trace_util_find_plugin_files(const char *suffix); void trace_util_free_plugin_files(char **files); /* Used for trace-cmd list */ void tracecmd_ftrace_load_options(void); /* event hooks */ struct hook_list { struct hook_list *next; struct buffer_instance *instance; const char *hook; char *str; char *start_system; char *start_event; char *start_match; char *end_system; char *end_event; char *end_match; char *pid; int migrate; int global; int stack; }; struct hook_list *tracecmd_create_event_hook(const char *arg); void tracecmd_free_hooks(struct hook_list *hooks); void tracecmd_plog(const char *fmt, ...); void tracecmd_plog_error(const char *fmt, ...); int tracecmd_set_logfile(char *logfile); /* --- System --- */ unsigned long long tracecmd_generate_traceid(void); int tracecmd_count_cpus(void); /* --- Hack! --- */ int tracecmd_blk_hack(struct tracecmd_input *handle); /* --- Stack tracer functions --- */ int tracecmd_stack_tracer_status(int *status); /* --- Debugging --- */ struct kbuffer *tracecmd_record_kbuf(struct tracecmd_input *handle, struct tep_record *record); void *tracecmd_record_page(struct tracecmd_input *handle, struct tep_record *record); void *tracecmd_record_offset(struct tracecmd_input *handle, struct tep_record *record); #ifdef PERF #include /* trace-cmd Perf */ struct trace_perf { int fd; int cpu; int pid; int pages; struct perf_event_attr pe; struct perf_event_mmap_page *mmap; }; int trace_perf_init(struct trace_perf *perf, int pages, int cpu, int pid); int trace_perf_open(struct trace_perf *perf); void trace_perf_close(struct trace_perf *perf); #endif #endif /* _TRACE_CMD_PRIVATE_H */ trace-cmd-v3.3.1/lib/trace-cmd/include/private/trace-filter-hash.h000066400000000000000000000040341470231550600247240ustar00rootroot00000000000000/* SPDX-License-Identifier: LGPL-2.1 */ /* * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt * Copyright (C) 2018 VMware Inc, Steven Rostedt * */ #ifndef _TRACE_FILTER_HASH_H #define _TRACE_FILTER_HASH_H #include struct tracecmd_filter_id_item { struct tracecmd_filter_id_item *next; int id; }; struct tracecmd_filter_id { struct tracecmd_filter_id_item **hash; int count; }; /** * tracecmd_quick_hash - A quick (non secured) hash alogirthm * @val: The value to perform the hash on * @bits: The size in bits you need to return * * This is a quick hashing function adapted from Donald E. Knuth's 32 * bit multiplicative hash. See The Art of Computer Programming (TAOCP). * Multiplication by the Prime number, closest to the golden ratio of * 2^32. * * @bits is used to max the result for use cases that require * a power of 2 return value that is less than 32 bits. Any value * of @bits greater than 31 (or zero), will simply return the full hash on @val. */ static inline uint32_t tracecmd_quick_hash(uint32_t val, unsigned int bits) { val *= UINT32_C(2654435761); if (!bits || bits > 31) return val; return val & ((1 << bits) - 1); } struct tracecmd_filter_id_item * tracecmd_filter_id_find(struct tracecmd_filter_id *hash, int id); void tracecmd_filter_id_add(struct tracecmd_filter_id *hash, int id); void tracecmd_filter_id_remove(struct tracecmd_filter_id *hash, int id); void tracecmd_filter_id_clear(struct tracecmd_filter_id *hash); struct tracecmd_filter_id *tracecmd_filter_id_hash_alloc(void); void tracecmd_filter_id_hash_free(struct tracecmd_filter_id *hash); struct tracecmd_filter_id * tracecmd_filter_id_hash_copy(struct tracecmd_filter_id *hash); int *tracecmd_filter_ids(struct tracecmd_filter_id *hash); int tracecmd_filter_id_compare(struct tracecmd_filter_id *hash1, struct tracecmd_filter_id *hash2); static inline int tracecmd_filter_task_count(struct tracecmd_filter_id *hash) { return hash->count; } #endif /* _TRACE_FILTER_HASH_H */ trace-cmd-v3.3.1/lib/trace-cmd/include/private/trace-hash.h000066400000000000000000000027711470231550600234470ustar00rootroot00000000000000/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2014 Red Hat Inc, Steven Rostedt * */ #ifndef _TRACE_HASH_H #define _TRACE_HASH_H struct trace_hash_item { struct trace_hash_item *next; struct trace_hash_item *prev; unsigned long long key; }; struct trace_hash { struct trace_hash_item **buckets; int nr_buckets; int power; }; int trace_hash_init(struct trace_hash *hash, int buckets); void trace_hash_free(struct trace_hash *hash); int trace_hash_add(struct trace_hash *hash, struct trace_hash_item *item); int trace_hash_empty(struct trace_hash *hash); static inline void trace_hash_del(struct trace_hash_item *item) { struct trace_hash_item *prev = item->prev; prev->next = item->next; if (item->next) item->next->prev = prev; } #define trace_hash_for_each_bucket(bucket, hash) \ for (bucket = (hash)->buckets; \ (bucket) < (hash)->buckets + (hash)->nr_buckets; (bucket)++) #define trace_hash_for_each_item(item, bucket) \ for ((item = *(bucket)); item; item = (item)->next) #define trace_hash_for_each_item_safe(item, n, bucket) \ for ((item = *(bucket)), n = item ? item->next : NULL; item; \ item = n, n = item ? (item)->next : NULL) #define trace_hash_while_item(item, bucket) \ while ((item = *(bucket))) typedef int (*trace_hash_func)(struct trace_hash_item *item, void *data); struct trace_hash_item * trace_hash_find(struct trace_hash *hash, unsigned long long key, trace_hash_func match, void *data); #endif /* _TRACE_HASH_H */ trace-cmd-v3.3.1/lib/trace-cmd/include/private/trace-msg.h000066400000000000000000000004531470231550600233050ustar00rootroot00000000000000/* SPDX-License-Identifier: LGPL-2.1 */ #ifndef _TRACE_MSG_H_ #define _TRACE_MSG_H_ #include #define UDP_MAX_PACKET (65536 - 20) #define V3_MAGIC "766679\0" #define V3_CPU "-1V3" #define V1_PROTOCOL 1 #define V3_PROTOCOL 3 extern unsigned int page_size; #endif /* _TRACE_MSG_H_ */ trace-cmd-v3.3.1/lib/trace-cmd/include/private/trace-rbtree.h000066400000000000000000000021721470231550600240020ustar00rootroot00000000000000/* SPDX-License-Identifier: LGPL-2.1 */ /* * Copyright (C) 2023 Google, Steven Rostedt * */ #ifndef _TRACE_RBTREE_H #define _TRACE_RBTREE_H struct trace_rbtree_node { struct trace_rbtree_node *parent; struct trace_rbtree_node *left; struct trace_rbtree_node *right; int color; }; typedef int (*trace_rbtree_cmp_fn)(const struct trace_rbtree_node *A, const struct trace_rbtree_node *B); typedef int (*trace_rbtree_search_fn)(const struct trace_rbtree_node *n, const void *data); struct trace_rbtree { struct trace_rbtree_node *node; trace_rbtree_search_fn search; trace_rbtree_cmp_fn cmp; size_t nr_nodes; }; void trace_rbtree_init(struct trace_rbtree *tree, trace_rbtree_cmp_fn cmp_fn, trace_rbtree_search_fn search_fn); struct trace_rbtree_node *trace_rbtree_find(struct trace_rbtree *tree, const void *data); void trace_rbtree_delete(struct trace_rbtree *tree, struct trace_rbtree_node *node); int trace_rbtree_insert(struct trace_rbtree *tree, struct trace_rbtree_node *node); struct trace_rbtree_node *trace_rbtree_pop_nobalance(struct trace_rbtree *tree); #endif /* _TRACE_RBTREE_H */ trace-cmd-v3.3.1/lib/trace-cmd/include/trace-cmd-local.h000066400000000000000000000077121470231550600227050ustar00rootroot00000000000000/* SPDX-License-Identifier: LGPL-2.1 */ /* * Copyright (C) 2010 Red Hat Inc, Steven Rostedt * */ #ifndef _TRACE_CMD_LOCAL_H #define _TRACE_CMD_LOCAL_H #include #include "trace-cmd-private.h" #define FILE_VERSION_DEFAULT 7 /* Can be overridden */ void tracecmd_warning(const char *fmt, ...); void tracecmd_critical(const char *fmt, ...); void tracecmd_info(const char *fmt, ...); #ifndef htonll # if __BYTE_ORDER == __LITTLE_ENDIAN #define htonll(x) __bswap_64(x) #define ntohll(x) __bswap_64(x) #else #define htonll(x) (x) #define ntohll(x) (x) #endif #endif #ifdef HAVE_ZLIB int tracecmd_zlib_init(void); #endif #ifdef HAVE_ZSTD int tracecmd_zstd_init(void); #else static inline int tracecmd_zstd_init(void) { return 0; } #endif struct data_file_write { unsigned long long file_size; unsigned long long write_size; /* offset in the trace file, where write_size is stored */ unsigned long long file_write_size; unsigned long long data_offset; /* offset in the trace file, where data_offset is stored */ unsigned long long file_data_offset; }; enum tracecmd_filters tracecmd_filter_match(struct tracecmd_filter *filter, struct tep_record *record); void trace_set_guest_map(struct tracecmd_input *handle, struct tracecmd_cpu_map *map); struct tracecmd_cpu_map *trace_get_guest_map(struct tracecmd_input *handle); void trace_set_guest_map_cnt(struct tracecmd_input *handle, int count); int trace_get_guest_map_cnt(struct tracecmd_input *handle); void trace_guest_map_free(struct tracecmd_cpu_map *map); void tracecmd_compress_init(void); void tracecmd_compress_free(void); bool check_file_state(unsigned long file_version, int current_state, int new_state); bool check_out_state(struct tracecmd_output *handle, int new_state); int out_uncompress_block(struct tracecmd_output *handle); int out_compression_start(struct tracecmd_output *handle, bool compress); int out_compression_end(struct tracecmd_output *handle, bool compress); void out_compression_reset(struct tracecmd_output *handle, bool compress); bool out_check_compression(struct tracecmd_output *handle); void out_set_file_state(struct tracecmd_output *handle, int new_state); int out_save_options_offset(struct tracecmd_output *handle, unsigned long long start); unsigned long long out_copy_fd_compress(struct tracecmd_output *handle, int fd, unsigned long long max, unsigned long long *write_size, int page); void in_uncompress_reset(struct tracecmd_input *handle); int in_uncompress_block(struct tracecmd_input *handle); unsigned long long out_write_section_header(struct tracecmd_output *handle, unsigned short header_id, char *description, int flags, bool option); int out_update_section_header(struct tracecmd_output *handle, unsigned long long offset); long long do_write_check(struct tracecmd_output *handle, const void *data, long long size); struct tracecmd_option * out_add_buffer_option(struct tracecmd_output *handle, const char *name, unsigned short id, unsigned long long data_offset, int cpus, struct data_file_write *cpu_data, int page_size); struct cpu_data_source { int fd; ssize_t size; off_t offset; }; int out_write_cpu_data(struct tracecmd_output *handle, int cpus, struct cpu_data_source *data, const char *buff_name); int out_write_emty_cpu_data(struct tracecmd_output *handle, int cpus); off_t msg_lseek(struct tracecmd_msg_handle *msg_handle, off_t offset, int whence); unsigned long long get_last_option_offset(struct tracecmd_input *handle); unsigned int get_meta_strings_size(struct tracecmd_input *handle); int trace_append_options(struct tracecmd_output *handle, void *buf, size_t len); void *trace_get_options(struct tracecmd_output *handle, size_t *len); /* filters */ struct tracecmd_filter *tracecmd_filter_get(struct tracecmd_input *handle); void tracecmd_filter_set(struct tracecmd_input *handle, struct tracecmd_filter *filter); void tracecmd_filter_free(struct tracecmd_filter *filter); #endif /* _TRACE_CMD_LOCAL_H */ trace-cmd-v3.3.1/lib/trace-cmd/include/trace-hash-local.h000066400000000000000000000017371470231550600230660ustar00rootroot00000000000000/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2009, Steven Rostedt * */ #ifndef _TRACE_HASH_LOCAL_H #define _TRACE_HASH_LOCAL_H static inline unsigned int trace_hash(unsigned int val) { unsigned int hash, tmp; hash = 12546869; /* random prime */ /* * The following hash is based off of Paul Hsieh's super fast hash: * http://www.azillionmonkeys.com/qed/hash.html * Note, he released this code unde the GPL 2.0 license, which * is the same as the license for the programs that use it here. */ hash += (val & 0xffff); tmp = (val >> 16) ^ hash; hash = (hash << 16) ^ tmp; hash += hash >> 11; hash ^= hash << 3; hash += hash >> 5; hash ^= hash << 4; hash += hash >> 17; hash ^= hash << 25; hash += hash >> 6; return hash; } static inline unsigned int trace_hash_str(char *str) { int val = 0; int i; for (i = 0; str[i]; i++) val += ((int)str[i]) << (i & 0xf); return trace_hash(val); } #endif /* _TRACE_HASH_LOCAL_H */ trace-cmd-v3.3.1/lib/trace-cmd/include/trace-tsync-local.h000066400000000000000000000042401470231550600232730ustar00rootroot00000000000000/* SPDX-License-Identifier: LGPL-2.1 */ /* * Copyright (C) 2019, VMware, Tzvetomir Stoyanov * */ #ifndef _TRACE_TSYNC_LOCAL_H #define _TRACE_TSYNC_LOCAL_H #include struct tsync_proto; struct tracecmd_time_sync { pthread_t thread; bool thread_running; unsigned long long trace_id; char *proto_name; int loop_interval; pthread_mutex_t lock; pthread_cond_t cond; pthread_barrier_t first_sync; char *clock_str; struct tracecmd_msg_handle *msg_handle; struct tsync_proto *proto; void *context; int guest_pid; int vcpu_count; int remote_id; int local_id; }; struct clock_sync_offsets { /* Arrays with calculated time offsets at given time */ int sync_size; /* Allocated size of sync_ts, * sync_offsets, sync_scalings and sync_frac */ int sync_count; /* Number of elements in sync_ts, * sync_offsets, sync_scalings and sync_frac */ long long *sync_ts; long long *sync_offsets; long long *sync_scalings; long long *sync_frac; }; struct clock_sync_context { void *proto_data; /* time sync protocol specific data */ bool is_server; /* server side time sync role */ bool is_guest; /* guest or host time sync role */ struct tracefs_instance *instance; /* ftrace buffer, used for time sync events */ int cpu_count; struct clock_sync_offsets *offsets; /* Array of size cpu_count * calculated offsets per CPU */ /* Identifiers of local and remote time sync peers */ unsigned int local_id; unsigned int remote_id; }; int tracecmd_tsync_proto_register(const char *proto_name, int accuracy, int roles, int supported_clocks, unsigned int flags, int (*init)(struct tracecmd_time_sync *), int (*free)(struct tracecmd_time_sync *), int (*calc)(struct tracecmd_time_sync *, long long *, long long *, long long*, long long *, unsigned int)); int tracecmd_tsync_proto_unregister(char *proto_name); int ptp_clock_sync_register(void); #ifdef VSOCK int kvm_clock_sync_register(void); #else static inline int kvm_clock_sync_register(void) { return 0; } #endif #endif /* _TRACE_TSYNC_LOCAL_H */ trace-cmd-v3.3.1/lib/trace-cmd/include/trace-write-local.h000066400000000000000000000013361470231550600232700ustar00rootroot00000000000000/* SPDX-License-Identifier: LGPL-2.1 */ /* * Copyright (C) 2010 Red Hat Inc, Steven Rostedt * */ #ifndef _TRACE_WRITE_LOCAL_H #define _TRACE_WRITE_LOCAL_H /* Local for trace-input.c, trace-output.c and trace-msg.c */ static inline ssize_t __do_write(int fd, const void *data, size_t size) { ssize_t tot = 0; ssize_t w; do { w = write(fd, data + tot, size - tot); tot += w; if (!w) break; if (w < 0) return w; } while (tot != size); return tot; } static inline ssize_t __do_write_check(int fd, const void *data, size_t size) { ssize_t ret; ret = __do_write(fd, data, size); if (ret < 0) return ret; if (ret != size) return -1; return 0; } #endif /* _TRACE_WRITE_LOCAL_H */ trace-cmd-v3.3.1/lib/trace-cmd/meson.build000066400000000000000000000042771470231550600203270ustar00rootroot00000000000000# SPDX-License-Identifier: LGPL-2.1 # # Copyright (c) 2023 Daniel Wagner, SUSE LLC sources = [ 'trace-hash.c', 'trace-rbtree.c', 'trace-hooks.c', 'trace-input.c', 'trace-output.c', 'trace-recorder.c', 'trace-util.c', 'trace-filter-hash.c', 'trace-filter.c', 'trace-msg.c', 'trace-plugin.c', 'trace-maps.c', 'trace-timesync.c', 'trace-timesync-ptp.c', 'trace-compress.c', 'trace-blk-hack.c', 'trace-ftrace.c', ] if perf_defined sources += 'trace-perf.c' endif if vsock_defined sources += 'trace-timesync-kvm.c' endif if zlib_dep.found() sources += 'trace-compress-zlib.c' endif if libzstd_dep.found() sources += 'trace-compress-zstd.c' endif if libtracecmd_standalone_build libtracecmd = library( 'tracecmd', sources, version: library_version, dependencies: [ libtraceevent_dep, libtracefs_dep, threads_dep, dl_dep, zlib_dep, libzstd_dep, audit_dep], include_directories: [ libtracecmd_incdir, libtracecmd_private_incdir, libtracecmd_ext_incdir], install: true) pkg = import('pkgconfig') pkg.generate( libtracecmd, subdirs: 'trace-cmd', libraries: [ libtracefs_dep, libtraceevent_dep], filebase: meson.project_name(), name: meson.project_name(), version: meson.project_version(), description: 'Library for creating and reading trace-cmd data files', url: 'https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/') libtracecmd_dep = declare_dependency( include_directories: ['.'], link_with: libtracecmd) else static_libtracecmd = static_library( 'tracecmd', sources, dependencies: [ libtraceevent_dep, libtracefs_dep, threads_dep, dl_dep, zlib_dep, libzstd_dep, audit_dep], include_directories: [ libtracecmd_incdir, libtracecmd_private_incdir, libtracecmd_ext_incdir], install: false) endif trace-cmd-v3.3.1/lib/trace-cmd/plugins/000077500000000000000000000000001470231550600176345ustar00rootroot00000000000000trace-cmd-v3.3.1/lib/trace-cmd/plugins/Makefile000066400000000000000000000025131470231550600212750ustar00rootroot00000000000000include $(src)/scripts/utils.mk bdir:=$(obj)/lib/trace-cmd/plugins PLUGIN_OBJS = PLUGIN_OBJS := $(PLUGIN_OBJS:%.o=$(bdir)/%.o) PLUGIN_BUILD := $(PLUGIN_OBJS:$(bdir)/%.o=$(bdir)/%.so) PLUGINS := $(PLUGIN_BUILD) DEPS := $(PLUGIN_OBJS:$(bdir)/%.o=$(bdir)/.%.d) all: $(PLUGINS) $(bdir): @mkdir -p $(bdir) $(PLUGIN_OBJS): | $(bdir) $(DEPS): | $(bdir) $(PLUGIN_OBJS): $(bdir)/%.o : %.c $(Q)$(do_compile_plugin_obj) $(PLUGIN_BUILD): $(bdir)/%.so: $(bdir)/%.o $(Q)$(do_plugin_build) $(DEPS): $(bdir)/.%.d: %.c $(Q)$(CC) -M -MT $(bdir)/$*.o $(CPPFLAGS) $(CFLAGS) $< > $@ $(PLUGIN_OBJS): $(bdir)/%.o : $(bdir)/.%.d PLUGINS_INSTALL = $(subst .so,.install,$(PLUGINS)) $(PLUGINS_INSTALL): $(bdir)/%.install : $(bdir)/%.so force $(Q)$(call do_install_data,$<,$(plugin_tracecmd_dir_SQ)) install_plugins: $(PLUGINS_INSTALL) # The following targets are necessary to trigger a rebuild when # $(PLUGIN_DIR_TRACECMD) change. Without them, a full clean build would # necessary in order to get the binaries updated. $(bdir)/tracecmd_plugin_dir: $(bdir) force $(Q)$(N)$(call update_dir, 'PLUGIN_DIR_TRACECMD=$(PLUGIN_DIR_TRACECMD)') dep_includes := $(wildcard $(DEPS)) ifneq ($(dep_includes),) include $(dep_includes) endif clean: $(RM) -f $(bdir)/*.a $(bdir)/*.so $(bdir)/*.o $(bdir)/.*.d\ $(bdir)/tracecmd_plugin_dir force: .PHONY: clean force trace-cmd-v3.3.1/lib/trace-cmd/test.c000066400000000000000000000001411470231550600172720ustar00rootroot00000000000000#include int main() { tracecmd_open_head("trace.dat", 0); return 0; } trace-cmd-v3.3.1/lib/trace-cmd/trace-blk-hack.c000066400000000000000000000077571470231550600211070ustar00rootroot00000000000000// SPDX-License-Identifier: LGPL-2.1 /* * Copyright (C) 2009 Red Hat Inc, Steven Rostedt * */ #include #include "trace-cmd.h" #include "trace-local.h" static const char blk_event_start[] = "name: blktrace\n" "ID: %d\n" "format:\n" "\tfield:unsigned short common_type;\toffset:0;\tsize:2;\n" "\tfield:unsigned char common_flags;\toffset:2;\tsize:1;\n" "\tfield:unsigned char common_preempt_count;\toffset:3;\tsize:1;\n" "\tfield:int common_pid;\toffset:4;\tsize:4;\n"; static const char blk_body[] = "\n" "\tfield:u64 sector;\toffset:16;\tsize:8;\n" "\tfield:int bytes;\toffset:24;\tsize:4;\n" "\tfield:int action;\toffset:28;\tsize:4;\n" "\tfield:int pid;\toffset:32;\tsize:4;\n" "\tfield:int device;\toffset:36;\tsize:4;\n" "\tfield:int cpu;\toffset:40;\tsize:4;\n" "\tfield:short error;\toffset:44;\tsize:2;\n" "\tfield:short pdu_len;\toffset:46;\tsize:2;\n" "\tfield:void data;\toffset:48;\tsize:0;\n" "\n" "print fmt: \"%%d\", REC->pid\n"; int tracecmd_blk_hack(struct tracecmd_input *handle) { struct tep_handle *pevent; struct tep_event *event; struct tep_format_field *field; char buf[4096]; /* way more than enough! */ int id; int l; int r; pevent = tracecmd_get_tep(handle); /* * Unfortunately, the TRACE_BLK has changed a bit. * We need to test if various events exist to try * to guess what event id TRACE_BLK would be. */ /* It was originally behind the "power" event */ event = tep_find_event_by_name(pevent, "ftrace", "power"); if (event) { id = event->id + 1; goto found; } /* * But the power tracer is now in perf. * Then it was after kmem_free */ event = tep_find_event_by_name(pevent, "ftrace", "kmem_free"); if (event) { id = event->id + 1; goto found; } /* * But that then went away. * Currently it should be behind the user stack. */ event = tep_find_event_by_name(pevent, "ftrace", "user_stack"); if (event) { id = event->id + 1; goto found; } /* Give up :( */ return -1; found: /* * Blk events are not exported in the events directory. * This is a hack to attempt to create a block event * that we can read. * * We'll make a format file to look like this: * * name: blktrace * ID: 13 * format: * field:unsigned short common_type; offset:0; size:2; * field:unsigned char common_flags; offset:2; size:1; * field:unsigned char common_preempt_count; offset:3; size:1; * field:int common_pid; offset:4; size:4; * field:int common_lock_depth; offset:8; size:4; * * field:u64 sector; offset:16; size:8; * field:int bytes; offset:32; size:4; * field:int action; offset:36; size:4; * field:int pid; offset:40; size:4; * field:int device; offset:44; size:4; * field:int cpu; offset:48; size:4; * field:short error; offset:52; size:2; * field:short pdu_len; offset:54; size:2; * field:void data; offset:60; size:0; * * print fmt: "%d", REC->pid * * Note: the struct blk_io_trace is used directly and * just the first parts of the struct are not used in order * to not write over the ftrace data. */ /* Make sure the common fields exist */ field = tep_find_common_field(event, "common_type"); if (!field || field->offset != 0 || field->size != 2) goto fail; field = tep_find_common_field(event, "common_flags"); if (!field || field->offset != 2 || field->size != 1) goto fail; field = tep_find_common_field(event, "common_preempt_count"); if (!field || field->offset != 3 || field->size != 1) goto fail; field = tep_find_common_field(event, "common_pid"); if (!field || field->offset != 4 || field->size != 4) goto fail; r = sprintf(buf, blk_event_start, id); l = r; /* lock depth is optional */ field = tep_find_common_field(event, "common_lock_depth"); if (field) { if (field->offset != 8 || field->size != 4) return -1; r = sprintf(buf+l, "\tfield:int common_lock_depth;\toffset:8;\tsize:4;\n"); l += r; } r = sprintf(buf+l, blk_body); /* Parse this event */ l += r; tep_parse_event(pevent, buf, l, "ftrace"); return 0; fail: return -1; } trace-cmd-v3.3.1/lib/trace-cmd/trace-compress-zlib.c000066400000000000000000000043011470231550600222020ustar00rootroot00000000000000// SPDX-License-Identifier: LGPL-2.1 /* * Copyright (C) 2021, VMware, Tzvetomir Stoyanov tz.stoyanov@gmail.com> * */ #include #include #include #include #include "trace-cmd-private.h" #define __ZLIB_NAME "zlib" #define __ZLIB_WEIGTH 10 static int zlib_compress(void *ctx, const void *in, int in_bytes, void *out, int out_bytes) { unsigned long obytes = out_bytes; int ret; ret = compress2((unsigned char *)out, &obytes, (unsigned char *)in, (unsigned long)in_bytes, Z_BEST_COMPRESSION); switch (ret) { case Z_OK: return obytes; case Z_BUF_ERROR: errno = -ENOBUFS; break; case Z_MEM_ERROR: errno = -ENOMEM; break; case Z_STREAM_ERROR: errno = -EINVAL; break; case Z_ERRNO: break; default: errno = -EFAULT; break; } return -1; } static int zlib_decompress(void *ctx, const void *in, int in_bytes, void *out, int out_bytes) { unsigned long obytes = out_bytes; int ret; ret = uncompress((unsigned char *)out, &obytes, (unsigned char *)in, (unsigned long)in_bytes); switch (ret) { case Z_OK: return obytes; case Z_BUF_ERROR: errno = -ENOBUFS; break; case Z_MEM_ERROR: errno = -ENOMEM; break; case Z_DATA_ERROR: errno = -EINVAL; break; case Z_ERRNO: break; default: errno = -EFAULT; break; } return -1; } static unsigned int zlib_compress_bound(void *ctx, unsigned int in_bytes) { return compressBound(in_bytes); } static bool zlib_is_supported(const char *name, const char *version) { const char *zver; if (!name) return false; if (strlen(name) != strlen(__ZLIB_NAME) || strcmp(name, __ZLIB_NAME)) return false; if (!version) return true; zver = zlibVersion(); if (!zver) return false; /* Compare the major version number */ if (atoi(version) <= atoi(zver)) return true; return false; } int tracecmd_zlib_init(void) { struct tracecmd_compression_proto proto; memset(&proto, 0, sizeof(proto)); proto.name = __ZLIB_NAME; proto.version = zlibVersion(); proto.weight = __ZLIB_WEIGTH; proto.compress = zlib_compress; proto.uncompress = zlib_decompress; proto.is_supported = zlib_is_supported; proto.compress_size = zlib_compress_bound; return tracecmd_compress_proto_register(&proto); } trace-cmd-v3.3.1/lib/trace-cmd/trace-compress-zstd.c000066400000000000000000000045561470231550600222420ustar00rootroot00000000000000// SPDX-License-Identifier: LGPL-2.1 /* * Copyright (C) 2022, Sebastian Andrzej Siewior * */ #include #include #include #include "trace-cmd-private.h" #define __ZSTD_NAME "zstd" #define __ZSTD_WEIGTH 5 struct zstd_context { ZSTD_CCtx *ctx_c; ZSTD_DCtx *ctx_d; }; static int zstd_compress(void *ctx, const void *in, int in_bytes, void *out, int out_bytes) { struct zstd_context *context = ctx; size_t ret; if (!ctx) return -1; ret = ZSTD_compress2(context->ctx_c, out, out_bytes, in, in_bytes); if (ZSTD_isError(ret)) return -1; return ret; } static int zstd_decompress(void *ctx, const void *in, int in_bytes, void *out, int out_bytes) { struct zstd_context *context = ctx; size_t ret; if (!ctx) return -1; ret = ZSTD_decompressDCtx(context->ctx_d, out, out_bytes, in, in_bytes); if (ZSTD_isError(ret)) { errno = -EINVAL; return -1; } return ret; } static unsigned int zstd_compress_bound(void *ctx, unsigned int in_bytes) { return ZSTD_compressBound(in_bytes); } static bool zstd_is_supported(const char *name, const char *version) { if (!name) return false; if (strcmp(name, __ZSTD_NAME)) return false; return true; } static void *new_zstd_context(void) { struct zstd_context *context; size_t r; context = calloc(1, sizeof(*context)); if (!context) return NULL; context->ctx_c = ZSTD_createCCtx(); context->ctx_d = ZSTD_createDCtx(); if (!context->ctx_c || !context->ctx_d) goto err; r = ZSTD_CCtx_setParameter(context->ctx_c, ZSTD_c_contentSizeFlag, 0); if (ZSTD_isError(r)) goto err; return context; err: ZSTD_freeCCtx(context->ctx_c); ZSTD_freeDCtx(context->ctx_d); free(context); return NULL; } static void free_zstd_context(void *ctx) { struct zstd_context *context = ctx; if (!ctx) return; ZSTD_freeCCtx(context->ctx_c); ZSTD_freeDCtx(context->ctx_d); free(context); } int tracecmd_zstd_init(void) { struct tracecmd_compression_proto proto; memset(&proto, 0, sizeof(proto)); proto.name = __ZSTD_NAME; proto.version = ZSTD_versionString(); proto.weight = __ZSTD_WEIGTH; proto.compress = zstd_compress; proto.uncompress = zstd_decompress; proto.is_supported = zstd_is_supported; proto.compress_size = zstd_compress_bound; proto.new_context = new_zstd_context; proto.free_context = free_zstd_context; return tracecmd_compress_proto_register(&proto); } trace-cmd-v3.3.1/lib/trace-cmd/trace-compress.c000066400000000000000000000575371470231550600212670ustar00rootroot00000000000000// SPDX-License-Identifier: LGPL-2.1 /* * Copyright (C) 2021, VMware, Tzvetomir Stoyanov tz.stoyanov@gmail.com> * */ #include #include #include #include #include #include "trace-cmd-private.h" #include "trace-cmd-local.h" struct compress_proto { struct compress_proto *next; char *proto_name; char *proto_version; int weight; int (*compress_block)(void *ctx, const void *in, int in_bytes, void *out, int out_bytes); int (*uncompress_block)(void *ctx, const void *in, int in_bytes, void *out, int out_bytes); unsigned int (*compress_size)(void *ctx, unsigned int bytes); bool (*is_supported)(const char *name, const char *version); void *(*new_context)(void); void (*free_context)(void *ctx); }; static struct compress_proto *proto_list; struct tracecmd_compression { int fd; size_t capacity; size_t capacity_read; size_t pointer; char *buffer; struct compress_proto *proto; struct tep_handle *tep; struct tracecmd_msg_handle *msg_handle; void *context; }; static ssize_t read_fd(int fd, char *dst, int len) { size_t size = 0; ssize_t r; do { r = read(fd, dst+size, len); if (r > 0) { size += r; len -= r; } else break; } while (r > 0); if (len) return -1; return size; } static ssize_t write_fd(int fd, const void *data, size_t size) { ssize_t tot = 0; ssize_t w; do { w = write(fd, data + tot, size - tot); tot += w; if (!w) break; if (w < 0) return w; } while (tot != size); return tot; } static ssize_t do_write(struct tracecmd_compression *handle, const void *data, size_t size) { int ret; if (handle->msg_handle) { ret = tracecmd_msg_data_send(handle->msg_handle, data, size); if (ret) return -1; return size; } return write_fd(handle->fd, data, size); } static inline int buffer_extend(struct tracecmd_compression *handle, size_t size) { ssize_t extend; char *buf; if (size <= handle->capacity) return 0; extend = (size / BUFSIZ + 1) * BUFSIZ; buf = realloc(handle->buffer, extend); if (!buf) return -1; handle->buffer = buf; handle->capacity = extend; return 0; } /** * tracecmd_compress_lseek - Move the read/write pointer into the compression buffer * @handle: compression handle * @offset: number of bytes to move the pointer, can be negative or positive * @whence: the starting position of the pointer movement, * * Returns the new file pointer on success, or -1 in case of an error. */ off_t tracecmd_compress_lseek(struct tracecmd_compression *handle, off_t offset, int whence) { unsigned long p; if (!handle || !handle->buffer) return (off_t)-1; switch (whence) { case SEEK_CUR: p = handle->pointer + offset; break; case SEEK_END: p = handle->capacity + offset; break; case SEEK_SET: p = offset; break; default: return (off_t)-1; } if (buffer_extend(handle, p)) return (off_t)-1; handle->pointer = p; return p; } static ssize_t compress_read(struct tracecmd_compression *handle, char *dst, size_t len) { if (handle->pointer > handle->capacity_read) return -1; if (handle->pointer + len > handle->capacity_read) len = handle->capacity_read - handle->pointer; memcpy(dst, handle->buffer + handle->pointer, len); return len; } /** * tracecmd_compress_pread - pread() on compression buffer * @handle: compression handle * @dst: return, store the read data * @len: length of data to be read * @offset: offset in the buffer of data to be read * * Read a @len of data from the compression buffer at given @offset, * without updating the buffer pointer. * * On success returns the number of bytes read, or -1 on failure. */ ssize_t tracecmd_compress_pread(struct tracecmd_compression *handle, char *dst, size_t len, off_t offset) { ssize_t ret; if (!handle || !handle->buffer || offset > handle->capacity_read) return -1; ret = tracecmd_compress_lseek(handle, offset, SEEK_SET); if (ret < 0) return ret; return compress_read(handle, dst, len); } /** * tracecmd_compress_buffer_read - read() from compression buffer * @handle: compression handle * @dst: return, store the read data * @len: length of data to be read * * Read a @len of data from the compression buffer * * On success returns the number of bytes read, or -1 on failure. */ ssize_t tracecmd_compress_buffer_read(struct tracecmd_compression *handle, char *dst, size_t len) { ssize_t ret; if (!handle || !handle->buffer) return -1; ret = compress_read(handle, dst, len); if (ret > 0) handle->pointer += ret; return ret; } /** * tracecmd_compress_reset - Reset the compression buffer * @handle: compression handle * * Reset the compression buffer, any data currently in the buffer * will be destroyed. * */ void tracecmd_compress_reset(struct tracecmd_compression *handle) { if (!handle) return; free(handle->buffer); handle->buffer = NULL; handle->pointer = 0; handle->capacity_read = 0; handle->capacity = 0; } /** * tracecmd_uncompress_block - uncompress a memory block * @handle: compression handle * * Read compressed memory block from the file and uncompress it into * internal buffer. The tracecmd_compress_buffer_read() can be used * to read the uncompressed data from the buffer. * * Returns 0 on success, or -1 in case of an error. */ int tracecmd_uncompress_block(struct tracecmd_compression *handle) { unsigned int s_uncompressed; unsigned int s_compressed; char *bytes = NULL; char buf[4]; int size; int ret; if (!handle || !handle->proto || !handle->proto->uncompress_block) return -1; tracecmd_compress_reset(handle); if (read(handle->fd, buf, 4) != 4) return -1; s_compressed = tep_read_number(handle->tep, buf, 4); if (read(handle->fd, buf, 4) != 4) return -1; s_uncompressed = tep_read_number(handle->tep, buf, 4); size = s_uncompressed > s_compressed ? s_uncompressed : s_compressed; handle->buffer = malloc(size); if (!handle->buffer) return -1; bytes = malloc(s_compressed); if (!bytes) goto error; if (read_fd(handle->fd, bytes, s_compressed) < 0) goto error; ret = handle->proto->uncompress_block(handle->context, bytes, s_compressed, handle->buffer, size); if (ret < 0) goto error; free(bytes); handle->pointer = 0; handle->capacity_read = ret; handle->capacity = size; return 0; error: tracecmd_compress_reset(handle); free(bytes); return -1; } /** * tracecmd_compress_block - compress a memory block * @handle: compression handle * * Compress the content of the internal memory buffer and write * the compressed data in the file. The tracecmd_compress_buffer_write() * can be used to write data into the internal memory buffer, * before calling this API. * * Returns 0 on success, or -1 in case of an error. */ int tracecmd_compress_block(struct tracecmd_compression *handle) { unsigned int size, real_size; char *buf; int endian4; int ret; if (!handle || !handle->proto || !handle->proto->compress_size || !handle->proto->compress_block) return -1; size = handle->proto->compress_size(handle->context, handle->pointer); buf = malloc(size); if (!buf) return -1; real_size = handle->proto->compress_block(handle->context, handle->buffer, handle->pointer, buf, size); if (real_size < 0) { ret = real_size; goto out; } /* Write compressed data size */ endian4 = tep_read_number(handle->tep, &real_size, 4); ret = do_write(handle, &endian4, 4); if (ret != 4) goto out; /* Write uncompressed data size */ endian4 = tep_read_number(handle->tep, &handle->pointer, 4); ret = do_write(handle, &endian4, 4); if (ret != 4) { ret = -1; goto out; } /* Write compressed data */ ret = do_write(handle, buf, real_size); if (ret != real_size) { ret = -1; goto out; } ret = 0; tracecmd_compress_reset(handle); out: free(buf); return ret; } /** * tracecmd_compress_buffer_write - write() to compression buffer * @handle: compression handle * @data: data to be written * @size: size of @data * * Write @data of @size in the compression buffer * * Returns 0 on success, or -1 on failure. */ int tracecmd_compress_buffer_write(struct tracecmd_compression *handle, const void *data, size_t size) { if (!handle) return -1; if (buffer_extend(handle, handle->pointer + size)) return -1; memcpy(&handle->buffer[handle->pointer], data, size); handle->pointer += size; if (handle->capacity_read < handle->pointer) handle->capacity_read = handle->pointer; return 0; } /** * tracecmd_compress_init - initialize the library with available compression algorithms */ void tracecmd_compress_init(void) { struct timeval time; gettimeofday(&time, NULL); srand((time.tv_sec * 1000) + (time.tv_usec / 1000)); #ifdef HAVE_ZLIB tracecmd_zlib_init(); #endif tracecmd_zstd_init(); } static struct compress_proto *compress_proto_select(void) { struct compress_proto *proto = proto_list; struct compress_proto *selected = NULL; while (proto) { if (!selected || selected->weight > proto->weight) selected = proto; proto = proto->next; } return selected; } /** * tracecmd_compress_alloc - Allocate a new compression context * @name: name of the compression algorithm. * If NULL - auto select the best available algorithm * @version: version of the compression algorithm, can be NULL * @fd: file descriptor for reading / writing data * @tep: tep handle, used to encode the data * @msg_handle: message handle, use it for reading / writing data instead of @fd * * Returns NULL on failure or pointer to allocated compression context. * The returned context must be freed by tracecmd_compress_destroy() */ struct tracecmd_compression *tracecmd_compress_alloc(const char *name, const char *version, int fd, struct tep_handle *tep, struct tracecmd_msg_handle *msg_handle) { struct tracecmd_compression *new; struct compress_proto *proto; if (name) { proto = proto_list; while (proto) { if (proto->is_supported && proto->is_supported(name, version)) break; proto = proto->next; } } else { proto = compress_proto_select(); } if (!proto) return NULL; new = calloc(1, sizeof(*new)); if (!new) return NULL; new->fd = fd; new->tep = tep; new->msg_handle = msg_handle; new->proto = proto; if (proto->new_context) new->context = proto->new_context(); return new; } /** * tracecmd_compress_destroy - Free a compression context * @handle: handle to the compression context that will be freed */ void tracecmd_compress_destroy(struct tracecmd_compression *handle) { if (!handle) return; tracecmd_compress_reset(handle); if (handle->proto && handle->proto->free_context) handle->proto->free_context(handle->context); free(handle); } /** * tracecmd_compress_is_supported - check if compression algorithm is supported * @name: name of the compression algorithm. * @version: version of the compression algorithm. * * Checks if compression algorithm with given name and version is supported. * Returns true if the algorithm is supported or false if it is not. */ bool tracecmd_compress_is_supported(const char *name, const char *version) { struct compress_proto *proto = proto_list; if (!name) return NULL; while (proto) { if (proto->is_supported && proto->is_supported(name, version)) return true; proto = proto->next; } return false; } /** * tracecmd_compress_proto_get_name - get name and version of compression algorithm * @compress: compression handle. * @name: return, name of the compression algorithm. * @version: return, version of the compression algorithm. * * Returns 0 on success, or -1 in case of an error. If 0 is returned, the name * and version of the algorithm are stored in @name and @version. The returned * strings must *not* be freed. */ int tracecmd_compress_proto_get_name(struct tracecmd_compression *compress, const char **name, const char **version) { if (!compress || !compress->proto) return -1; if (name) *name = compress->proto->proto_name; if (version) *version = compress->proto->proto_version; return 0; } /** * tracecmd_compress_proto_register - register a new compression algorithm * @name: name of the compression algorithm. * @version: version of the compression algorithm. * @weight: weight of the compression algorithm, lower is better. * @compress: compression hook, called to compress a memory block. * @uncompress: uncompression hook, called to uncompress a memory block. * @compress_size: hook, called to get the required minimum size of the buffer * for compression given number of bytes. * @is_supported: check hook, called to check if compression with given name and * version is supported by this plugin. * * Returns 0 on success, or -1 in case of an error. If algorithm with given name * and version is already registered, -1 is returned. */ int tracecmd_compress_proto_register(struct tracecmd_compression_proto *proto) { struct compress_proto *new; if (!proto || !proto->name || !proto->compress || !proto->uncompress) return -1; if (tracecmd_compress_is_supported(proto->name, proto->version)) return -1; new = calloc(1, sizeof(*new)); if (!new) return -1; new->proto_name = strdup(proto->name); if (!new->proto_name) goto error; new->proto_version = strdup(proto->version); if (!new->proto_version) goto error; new->compress_block = proto->compress; new->uncompress_block = proto->uncompress; new->compress_size = proto->compress_size; new->is_supported = proto->is_supported; new->weight = proto->weight; new->next = proto_list; new->new_context = proto->new_context; new->free_context = proto->free_context; proto_list = new; return 0; error: free(new->proto_name); free(new->proto_version); free(new); return -1; } /** * tracecmd_compress_free - free the library resources, related to available compression algorithms * */ void tracecmd_compress_free(void) { struct compress_proto *proto = proto_list; struct compress_proto *del; while (proto) { del = proto; proto = proto->next; free(del->proto_name); free(del->proto_version); free(del); } proto_list = NULL; } /** * tracecmd_compress_protos_get - get a list of all supported compression algorithms and versions * @names: return, array with names of all supported compression algorithms * @versions: return, array with versions of all supported compression algorithms * * On success, the size of @names and @versions arrays is returned. * Those arrays are allocated by the API and must be freed with free() by the * caller. Both arrays are with same size, each name from @names corresponds to * a version from @versions. The last element in both arrays is a NULL pointer. * On error -1 is returned and @names and @versions arrays are not allocated. */ int tracecmd_compress_protos_get(char ***names, char ***versions) { struct compress_proto *proto = proto_list; char **n = NULL; char **v = NULL; int c, i; for (c = 0; proto; proto = proto->next) c++; if (c < 1) return c; n = calloc(c + 1, sizeof(char *)); if (!n) goto error; v = calloc(c + 1, sizeof(char *)); if (!v) goto error; proto = proto_list; for (i = 0; i < c && proto; i++) { n[i] = proto->proto_name; v[i] = proto->proto_version; proto = proto->next; } n[i] = NULL; v[i] = NULL; *names = n; *versions = v; return c; error: free(n); free(v); return -1; } /** * tracecmd_compress_copy_from - Copy and compress data from a file * @handle: compression handle * @fd: file descriptor to uncompressed data to copy from * @chunk_size: size of one compression chunk * @read_size: Pointer to max bytes to read from. The pointer is updated * with the actual size of compressed data read. If 0 is passed, * read until the EOF is reached. * @write_size: return, size of the compressed data written into @handle * * This function reads uncompressed data from given @fd, compresses the data * using the @handle compression context and writes the compressed data into the * fd associated with the @handle. The data is compressed on chunks with given * @chunk_size size. The compressed data is written in the format: * - 4 bytes, chunks count * - for each chunk: * - 4 bytes, size of compressed data in this chunk * - 4 bytes, uncompressed size of the data in this chunk * - data, bytes of * * On success 0 is returned, @read_size and @write_size are updated with the size of * read and written data. */ int tracecmd_compress_copy_from(struct tracecmd_compression *handle, int fd, int chunk_size, size_t *read_size, size_t *write_size) { size_t rchunk = 0; size_t chunks = 0; size_t rsize = 0; size_t rmax = 0; size_t csize; size_t size; size_t all; size_t r; off_t end_offset; off_t offset; char *buf_from; char *buf_to; int endian4; int ret; if (!handle || !handle->proto || !handle->proto->compress_block || !handle->proto->compress_size) return 0; if (read_size) rmax = *read_size; csize = handle->proto->compress_size(handle->context, chunk_size); buf_from = malloc(chunk_size); if (!buf_from) return -1; buf_to = malloc(csize); if (!buf_to) { free(buf_from); return -1; } /* save the initial offset and write 0 as initial chunk count */ offset = lseek(handle->fd, 0, SEEK_CUR); write_fd(handle->fd, &chunks, 4); do { all = 0; if (rmax > 0 && (rmax - rsize) < chunk_size) rchunk = (rmax - rsize); else rchunk = chunk_size; do { r = read(fd, buf_from + all, rchunk - all); if (r <= 0) break; all += r; } while (all != rchunk); if (r < 0 || (rmax > 0 && rsize >= rmax)) break; rsize += all; size = csize; if (all > 0) { ret = handle->proto->compress_block(handle->context, buf_from, all, buf_to, size); if (ret < 0) { if (errno == EINTR) continue; break; } size = ret; /* Write compressed data size */ endian4 = tep_read_number(handle->tep, &size, 4); ret = write_fd(handle->fd, &endian4, 4); if (ret != 4) break; /* Write uncompressed data size */ endian4 = tep_read_number(handle->tep, &all, 4); ret = write_fd(handle->fd, &endian4, 4); if (ret != 4) break; /* Write the compressed data */ ret = write_fd(handle->fd, buf_to, size); if (ret != size) break; chunks++; } } while (all > 0); free(buf_from); free(buf_to); if (all) return -1; if (lseek(handle->fd, offset, SEEK_SET) == (off_t)-1) return -1; endian4 = tep_read_number(handle->tep, &chunks, 4); /* write chunks count*/ write_fd(handle->fd, &chunks, 4); end_offset = lseek(handle->fd, 0, SEEK_END); if (end_offset == (off_t)-1) return -1; if (read_size) *read_size = rsize; if (write_size) *write_size = end_offset - offset; return 0; } /** * tracecmd_load_chunks_info - Read compression chunks information from the file * @handle: compression handle * @chunks_info: return, array with compression chunks information * * This function reads information of all compression chunks in the current * compression block from the file and fills that information in a newly * allocated array @chunks_info which is returned. * * On success count of compression chunks is returned. Array of that count is * allocated and returned in @chunks_info. Each entry describes one compression * chunk. On error -1 is returned. In case of success, @chunks_info must be * freed by free(). */ int tracecmd_load_chunks_info(struct tracecmd_compression *handle, struct tracecmd_compress_chunk **chunks_info) { struct tracecmd_compress_chunk *chunks = NULL; size_t size = 0; unsigned int count = 0; off_t offset; int ret = -1; char buf[4]; int i; if (!handle) return -1; offset = lseek(handle->fd, 0, SEEK_CUR); if (offset == (off_t)-1) return -1; if (read(handle->fd, buf, 4) != 4) return -1; count = tep_read_number(handle->tep, buf, 4); if (!count) { ret = 0; goto out; } chunks = calloc(count, sizeof(struct tracecmd_compress_chunk)); if (!chunks) goto out; for (i = 0; i < count; i++) { chunks[i].zoffset = lseek(handle->fd, 0, SEEK_CUR); if (chunks[i].zoffset == (off_t)-1) goto out; if (read(handle->fd, buf, 4) != 4) goto out; chunks[i].zsize = tep_read_number(handle->tep, buf, 4); chunks[i].offset = size; if (read(handle->fd, buf, 4) != 4) goto out; chunks[i].size = tep_read_number(handle->tep, buf, 4); size += chunks[i].size; if (lseek(handle->fd, chunks[i].zsize, SEEK_CUR) == (off_t)-1) goto out; } ret = count; out: if (lseek(handle->fd, offset, SEEK_SET) == (off_t)-1) ret = -1; if (ret > 0 && chunks_info) *chunks_info = chunks; else free(chunks); return ret; } /** * tracecmd_uncompress_chunk - Uncompress given compression chunk. * @handle: compression handle * @chunk: chunk, that will be uncompressed in @data * @data: Preallocated memory for uncompressed data. Must have enough space * to hold the uncompressed data. * * This function uncompresses the chunk described by @chunk and stores * the uncompressed data in the preallocated memory @data. * * On success 0 is returned and the uncompressed data is stored in @data. * On error -1 is returned. */ int tracecmd_uncompress_chunk(struct tracecmd_compression *handle, struct tracecmd_compress_chunk *chunk, char *data) { char *bytes_in = NULL; int ret = -1; if (!handle || !handle->proto || !handle->proto->uncompress_block || !chunk || !data) return -1; if (lseek(handle->fd, chunk->zoffset + 8, SEEK_SET) == (off_t)-1) return -1; bytes_in = malloc(chunk->zsize); if (!bytes_in) return -1; if (read_fd(handle->fd, bytes_in, chunk->zsize) < 0) goto out; if (handle->proto->uncompress_block(handle->context, bytes_in, chunk->zsize, data, chunk->size) < 0) goto out; ret = 0; out: free(bytes_in); return ret; } /** * tracecmd_uncompress_copy_to - Uncompress data and copy to a file * @handle: compression handle * @fd: file descriptor to uncompressed data to copy into * @read_size: return, size of the compressed data read from @handle * @write_size: return, size of the uncompressed data written into @fd * * This function reads compressed data from the fd, associated with @handle, * uncompresses it using the @handle compression context and writes * the uncompressed data into the fd. The compressed data must be in the format: * - 4 bytes, chunks count * - for each chunk: * - 4 bytes, size of compressed data in this chunk * - 4 bytes, uncompressed size of the data in this chunk * - data, bytes of * * On success 0 is returned, @read_size and @write_size are updated with * the size of read and written data. */ int tracecmd_uncompress_copy_to(struct tracecmd_compression *handle, int fd, size_t *read_size, size_t *write_size) { size_t s_uncompressed; size_t s_compressed; size_t rsize = 0; size_t wsize = 0; char *bytes_out = NULL; char *bytes_in = NULL; int size_out = 0; int size_in = 0; int chunks; char buf[4]; char *tmp; int ret; if (!handle || !handle->proto || !handle->proto->uncompress_block) return -1; if (read(handle->fd, buf, 4) != 4) return -1; chunks = tep_read_number(handle->tep, buf, 4); rsize += 4; while (chunks) { if (read(handle->fd, buf, 4) != 4) break; s_compressed = tep_read_number(handle->tep, buf, 4); rsize += 4; if (read(handle->fd, buf, 4) != 4) break; s_uncompressed = tep_read_number(handle->tep, buf, 4); rsize += 4; if (!bytes_in || size_in < s_compressed) { tmp = realloc(bytes_in, s_compressed); if (!tmp) break; bytes_in = tmp; size_in = s_compressed; } if (!bytes_out || size_out < s_uncompressed) { tmp = realloc(bytes_out, s_uncompressed); if (!tmp) break; bytes_out = tmp; size_out = s_uncompressed; } if (read_fd(handle->fd, bytes_in, s_compressed) < 0) break; rsize += s_compressed; ret = handle->proto->uncompress_block(handle->context, bytes_in, s_compressed, bytes_out, s_uncompressed); if (ret < 0) break; write_fd(fd, bytes_out, ret); wsize += ret; chunks--; } free(bytes_in); free(bytes_out); if (chunks) return -1; if (read_size) *read_size = rsize; if (write_size) *write_size = wsize; return 0; } trace-cmd-v3.3.1/lib/trace-cmd/trace-filter-hash.c000066400000000000000000000076551470231550600216360ustar00rootroot00000000000000// SPDX-License-Identifier: LGPL-2.1 /* * Copyright (C) 2009, Steven Rostedt * Copyright (C) 2018 VMware Inc, Steven Rostedt * */ #include #include #include #include #include #include "trace-filter-hash.h" #define FILTER_HASH_BITS 8 #define FILTER_HASH_SIZE (1 << FILTER_HASH_BITS) struct tracecmd_filter_id_item * tracecmd_filter_id_find(struct tracecmd_filter_id *hash, int id) { int key = tracecmd_quick_hash(id, FILTER_HASH_BITS); struct tracecmd_filter_id_item *item = hash->hash[key]; while (item) { if (item->id == id) break; item = item->next; } return item; } void tracecmd_filter_id_add(struct tracecmd_filter_id *hash, int id) { int key = tracecmd_quick_hash(id, FILTER_HASH_BITS); struct tracecmd_filter_id_item *item; item = calloc(1, sizeof(*item)); assert(item); item->id = id; item->next = hash->hash[key]; hash->hash[key] = item; hash->count++; } void tracecmd_filter_id_remove(struct tracecmd_filter_id *hash, int id) { int key = tracecmd_quick_hash(id, FILTER_HASH_BITS); struct tracecmd_filter_id_item **next = &hash->hash[key]; struct tracecmd_filter_id_item *item; while (*next) { if ((*next)->id == id) break; next = &(*next)->next; } if (!*next) return; assert(hash->count); hash->count--; item = *next; *next = item->next; free(item); } void tracecmd_filter_id_clear(struct tracecmd_filter_id *hash) { struct tracecmd_filter_id_item *item, *next; int i; for (i = 0; i < FILTER_HASH_SIZE; i++) { next = hash->hash[i]; if (!next) continue; hash->hash[i] = NULL; while (next) { item = next; next = item->next; free(item); } } hash->count = 0; } struct tracecmd_filter_id *tracecmd_filter_id_hash_alloc(void) { struct tracecmd_filter_id *hash; hash = calloc(1, sizeof(*hash)); assert(hash); hash->hash = calloc(FILTER_HASH_SIZE, sizeof(*hash->hash)); hash->count = 0; return hash; } void tracecmd_filter_id_hash_free(struct tracecmd_filter_id *hash) { if (!hash) return; tracecmd_filter_id_clear(hash); free(hash->hash); free(hash); } struct tracecmd_filter_id * tracecmd_filter_id_hash_copy(struct tracecmd_filter_id *hash) { struct tracecmd_filter_id *new_hash; struct tracecmd_filter_id_item *item, **pitem; int i; if (!hash) return NULL; new_hash = tracecmd_filter_id_hash_alloc(); assert(new_hash); for (i = 0; i < FILTER_HASH_SIZE; i++) { item = hash->hash[i]; if (!item) continue; pitem = &new_hash->hash[i]; while (item) { *pitem = calloc(1, sizeof(*item)); assert(*pitem); **pitem = *item; pitem = &(*pitem)->next; item = item->next; } } new_hash->count = hash->count; return new_hash; } int *tracecmd_filter_ids(struct tracecmd_filter_id *hash) { struct tracecmd_filter_id_item *item; int *ids; int count = 0; int i; if (!hash->count) return NULL; ids = malloc(sizeof(*ids) * (hash->count + 1)); if (!ids) return NULL; for (i = 0; i < FILTER_HASH_SIZE; i++) { item = hash->hash[i]; while (item) { ids[count++] = item->id; item = item->next; } } ids[count] = -1; return ids; } /** * filter_id_compare - compare two id hashes to see if they are equal * @hash1: one hash to compare * @hash2: another hash to compare to @hash1 * * Returns 1 if the two hashes are the same, 0 otherwise. */ int tracecmd_filter_id_compare(struct tracecmd_filter_id *hash1, struct tracecmd_filter_id *hash2) { int *ids; int ret = 0; int i; /* If counts don't match, then they obviously are not the same */ if (hash1->count != hash2->count) return 0; /* If both hashes are empty, they are the same */ if (!hash1->count && !hash2->count) return 1; /* Now compare the pids of one hash with the other */ ids = tracecmd_filter_ids(hash1); for (i = 0; ids[i] >= 0; i++) { if (!tracecmd_filter_id_find(hash2, ids[i])) break; } if (ids[i] == -1) ret = 1; free(ids); return ret; } trace-cmd-v3.3.1/lib/trace-cmd/trace-filter.c000066400000000000000000000111321470231550600206760ustar00rootroot00000000000000// SPDX-License-Identifier: LGPL-2.1 /* * Copyright (C) 2022, Google Inc, Steven Rostedt */ #include #include #include struct filter { struct tep_event_filter *filter; }; struct tracecmd_filter { struct tep_handle *tep; struct filter **event_filters; struct filter **event_notrace; bool *last_printed; int nr_cpus; int nr_filters; int nr_notrace; int kernel_stacktrace_id; int user_stacktrace_id; }; static bool test_stacktrace(struct tracecmd_filter *filter, struct tep_record *record, int stacktrace_id) { struct tep_handle *tep = filter->tep; int id; if (stacktrace_id < 0) return false; id = tep_data_type(tep, record); if (id != stacktrace_id) return false; return filter->last_printed[record->cpu]; } static bool test_stacktraces(struct tracecmd_filter *filter, struct tep_record *record) { return test_stacktrace(filter, record, filter->kernel_stacktrace_id) || test_stacktrace(filter, record, filter->user_stacktrace_id); } __hidden enum tracecmd_filters tracecmd_filter_match(struct tracecmd_filter *filter, struct tep_record *record) { bool is_stack = false; bool found = false; int ret; int i; if (!filter) return TRACECMD_FILTER_NONE; /* Setup stack traces. If a event is shown, still show stack traces */ if (!filter->kernel_stacktrace_id) { struct tep_handle *tep = filter->tep; struct tep_event *event; /* In case the below logic fails, do not do this again */ filter->kernel_stacktrace_id = -1; event = tep_find_event_by_name(tep, "ftrace", "kernel_stack"); if (event) filter->kernel_stacktrace_id = event->id; event = tep_find_event_by_name(tep, "ftrace", "user_stack"); if (event) filter->user_stacktrace_id = event->id; filter->nr_cpus = tep_get_cpus(tep); filter->last_printed = calloc(filter->nr_cpus, sizeof(*filter->last_printed)); if (!filter->last_printed) { tracecmd_warning("Could not allocate last_printed array for stack trace filtering"); filter->kernel_stacktrace_id = -1; filter->user_stacktrace_id = -1; } } for (i = 0; i < filter->nr_filters; i++) { ret = tep_filter_match(filter->event_filters[i]->filter, record); switch (ret) { case TRACECMD_FILTER_NONE: case TRACECMD_FILTER_MATCH: found = true; } if (found) break; } if (!found && filter->nr_filters) { /* If this is a stack trace and the last event was printed continue */ if (!test_stacktraces(filter, record)) return TRACECMD_FILTER_MISS; is_stack = true; } found = false; /* We need to test all negative filters */ for (i = 0; i < filter->nr_notrace; i++) { ret = tep_filter_match(filter->event_notrace[i]->filter, record); switch (ret) { case TRACECMD_FILTER_NONE: case TRACECMD_FILTER_MATCH: found = true; } if (found) break; } if (filter->last_printed) filter->last_printed[record->cpu] = !is_stack && !found; return found ? TRACECMD_FILTER_MISS : TRACECMD_FILTER_MATCH; } struct tracecmd_filter *tracecmd_filter_add(struct tracecmd_input *handle, const char *filter_str, bool neg) { struct tracecmd_filter *trace_filter; struct tep_handle *tep; struct filter ***filter_ptr; struct filter **filters; struct filter *filter; int *nr; int ret; filter = calloc(1, sizeof(*filter)); if (!filter) return NULL; tep = tracecmd_get_tep(handle); trace_filter = tracecmd_filter_get(handle); if (!trace_filter) { trace_filter = calloc(1, sizeof(*trace_filter)); if (!trace_filter) goto fail; tracecmd_filter_set(handle, trace_filter); trace_filter->tep = tep; } filter->filter = tep_filter_alloc(tep); if (!filter->filter) goto fail; ret = tep_filter_add_filter_str(filter->filter, filter_str); if (ret < 0) goto fail; if (neg) { filter_ptr = &trace_filter->event_notrace; nr = &trace_filter->nr_notrace; } else { filter_ptr = &trace_filter->event_filters; nr = &trace_filter->nr_filters; } filters = realloc(*filter_ptr, sizeof(*filters) * (*nr + 1)); if (!filters) goto fail; *filter_ptr = filters; filters[*nr] = filter; (*nr)++; return trace_filter; fail: if (filter) { tep_filter_free(filter->filter); free(filter); } return NULL; } static void free_filters (struct filter **filter, int nr) { int i; for (i = 0; i < nr; i++) { tep_filter_free(filter[i]->filter); free(filter[i]); } free(filter); } __hidden void tracecmd_filter_free(struct tracecmd_filter *trace_filter) { if (!trace_filter) return; free_filters(trace_filter->event_filters, trace_filter->nr_filters); free_filters(trace_filter->event_notrace, trace_filter->nr_notrace); free(trace_filter); } trace-cmd-v3.3.1/lib/trace-cmd/trace-ftrace.c000066400000000000000000000277621470231550600206750ustar00rootroot00000000000000// SPDX-License-Identifier: LGPL-2.1 /* * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt * */ #include #include #include #include #include "trace-cmd-private.h" #define MAX_LINUX_ERRNO 4095 #define IS_LINUX_ERR_VALUE(x) ((unsigned long long)(void *)(x) >= (unsigned long long)-MAX_LINUX_ERRNO) struct tep_plugin_option trace_ftrace_options[] = { { .name = "tailprint", .plugin_alias = "fgraph", .description = "Print function name at function exit in function graph", }, { .name = "depth", .plugin_alias = "fgraph", .description = "Show the depth of each entry", }, { .name = "retval-skip", .plugin_alias = "fgraph", .description = "Skip printing function retval in function graph", }, { .name = "retval-dec", .plugin_alias = "fgraph", .description = "Print function retval in decimal at function exit in function graph", }, { .name = "retval-hex", .plugin_alias = "fgraph", .description = "Print function retval in hex at function exit in function graph", }, { .name = NULL, } }; static struct tep_plugin_option *fgraph_tail = &trace_ftrace_options[0]; static struct tep_plugin_option *fgraph_depth = &trace_ftrace_options[1]; static struct tep_plugin_option *fgraph_retval_skip = &trace_ftrace_options[2]; static struct tep_plugin_option *fgraph_retval_dec = &trace_ftrace_options[3]; static struct tep_plugin_option *fgraph_retval_hex = &trace_ftrace_options[4]; static int find_ret_event(struct tracecmd_ftrace *finfo, struct tep_handle *pevent) { struct tep_event *event; /* Store the func ret id and event for later use */ event = tep_find_event_by_name(pevent, "ftrace", "funcgraph_exit"); if (!event) return -1; finfo->fgraph_ret_id = event->id; finfo->fgraph_ret_event = event; return 0; } #define ret_event_check(finfo, pevent) \ do { \ if (!finfo->fgraph_ret_event && find_ret_event(finfo, pevent) < 0) \ return -1; \ } while (0) static int function_handler(struct trace_seq *s, struct tep_record *record, struct tep_event *event, void *context) { struct tep_handle *pevent = event->tep; unsigned long long function; const char *func; if (tep_get_field_val(s, event, "ip", record, &function, 1)) return trace_seq_putc(s, '!'); func = tep_find_function(pevent, function); if (func) trace_seq_printf(s, "%s <-- ", func); else trace_seq_printf(s, "0x%llx", function); if (tep_get_field_val(s, event, "parent_ip", record, &function, 1)) return trace_seq_putc(s, '!'); func = tep_find_function(pevent, function); if (func) trace_seq_printf(s, "%s", func); else trace_seq_printf(s, "0x%llx", function); return 0; } #define TRACE_GRAPH_INDENT 2 static struct tep_record * get_return_for_leaf(struct trace_seq *s, int cpu, int cur_pid, unsigned long long cur_func, struct tep_record *next, struct tracecmd_ftrace *finfo) { unsigned long long val; unsigned long long type; unsigned long long pid; /* Searching a common field, can use any event */ if (tep_get_common_field_val(s, finfo->fgraph_ret_event, "common_type", next, &type, 1)) return NULL; if (type != finfo->fgraph_ret_id) return NULL; if (tep_get_common_field_val(s, finfo->fgraph_ret_event, "common_pid", next, &pid, 1)) return NULL; if (cur_pid != pid) return NULL; /* We aleady know this is a funcgraph_ret_event */ if (tep_get_field_val(s, finfo->fgraph_ret_event, "func", next, &val, 1)) return NULL; if (cur_func != val) return NULL; /* this is a leaf, now advance the iterator */ return tracecmd_read_data(tracecmd_curr_thread_handle, cpu); } /* Signal a overhead of time execution to the output */ static void print_graph_overhead(struct trace_seq *s, unsigned long long duration) { /* Non nested entry or return */ if (duration == ~0ULL) return (void)trace_seq_printf(s, " "); /* Duration exceeded 1 sec */ if (duration > 1000000000ULL) return (void)trace_seq_printf(s, "$ "); /* Duration exceeded 1000 usecs */ if (duration > 1000000ULL) return (void)trace_seq_printf(s, "# "); /* Duration exceeded 100 usecs */ if (duration > 100000ULL) return (void)trace_seq_printf(s, "! "); /* Duration exceeded 10 usecs */ if (duration > 10000ULL) return (void)trace_seq_printf(s, "+ "); trace_seq_printf(s, " "); } static void print_graph_duration(struct trace_seq *s, unsigned long long duration) { unsigned long usecs = duration / 1000; unsigned long nsecs_rem = duration % 1000; /* log10(ULONG_MAX) + '\0' */ char msecs_str[21]; char nsecs_str[5]; int len; int i; sprintf(msecs_str, "%lu", usecs); /* Print msecs */ len = s->len; trace_seq_printf(s, "%lu", usecs); /* Print nsecs (we don't want to exceed 7 numbers) */ if ((s->len - len) < 7) { snprintf(nsecs_str, MIN(sizeof(nsecs_str), 8 - len), "%03lu", nsecs_rem); trace_seq_printf(s, ".%s", nsecs_str); } len = s->len - len; trace_seq_puts(s, " us "); /* Print remaining spaces to fit the row's width */ for (i = len; i < 7; i++) trace_seq_putc(s, ' '); trace_seq_puts(s, "| "); } static int print_graph_entry_leaf(struct trace_seq *s, struct tep_event *event, struct tep_record *record, struct tep_record *ret_rec, struct tracecmd_ftrace *finfo) { struct tep_handle *pevent = event->tep; unsigned long long rettime, calltime; unsigned long long duration, depth; unsigned long long val; unsigned long long retval; bool fgraph_retval_supported = true; const char *func; int ret; int i; if (tep_get_field_val(s, finfo->fgraph_ret_event, "rettime", ret_rec, &rettime, 1)) return trace_seq_putc(s, '!'); if (tep_get_field_val(s, finfo->fgraph_ret_event, "calltime", ret_rec, &calltime, 1)) return trace_seq_putc(s, '!'); if (!tep_find_field(finfo->fgraph_ret_event, "retval")) { fgraph_retval_supported = false; } else { if (tep_get_field_val(s, finfo->fgraph_ret_event, "retval", ret_rec, &retval, 1)) return trace_seq_putc(s, '!'); } duration = rettime - calltime; /* Overhead */ print_graph_overhead(s, duration); /* Duration */ print_graph_duration(s, duration); if (tep_get_field_val(s, event, "depth", record, &depth, 1)) return trace_seq_putc(s, '!'); /* Function */ for (i = 0; i < (int)(depth * TRACE_GRAPH_INDENT); i++) trace_seq_putc(s, ' '); if (tep_get_field_val(s, event, "func", record, &val, 1)) return trace_seq_putc(s, '!'); func = tep_find_function(pevent, val); if (func) ret = trace_seq_printf(s, "%s();", func); else ret = trace_seq_printf(s, "%llx();", val); if (ret && fgraph_depth->set) ret = trace_seq_printf(s, " (%lld)", depth); /* Return Value */ if (ret && fgraph_retval_supported && !fgraph_retval_skip->set) { if (fgraph_retval_dec->set) { ret = trace_seq_printf(s, " (ret=%lld)", retval); } else if (fgraph_retval_hex->set) { ret = trace_seq_printf(s, " (ret=0x%llx)", retval); } else { /* Error codes are in decimal; others are in hex */ if (!IS_LINUX_ERR_VALUE(retval)) ret = trace_seq_printf(s, " (ret=0x%llx)", retval); else ret = trace_seq_printf(s, " (ret=%lld)", retval); } } return ret; } static int print_graph_nested(struct trace_seq *s, struct tep_event *event, struct tep_record *record) { struct tep_handle *pevent = event->tep; unsigned long long depth; unsigned long long val; const char *func; int ret; int i; /* No overhead */ print_graph_overhead(s, -1); /* No time */ trace_seq_puts(s, " | "); if (tep_get_field_val(s, event, "depth", record, &depth, 1)) return trace_seq_putc(s, '!'); /* Function */ for (i = 0; i < (int)(depth * TRACE_GRAPH_INDENT); i++) trace_seq_putc(s, ' '); if (tep_get_field_val(s, event, "func", record, &val, 1)) return trace_seq_putc(s, '!'); func = tep_find_function(pevent, val); if (func) ret = trace_seq_printf(s, "%s() {", func); else ret = trace_seq_printf(s, "%llx() {", val); if (ret && fgraph_depth->set) ret = trace_seq_printf(s, " (%lld)", depth); return ret; } static int fgraph_ent_handler(struct trace_seq *s, struct tep_record *record, struct tep_event *event, void *context) { struct tracecmd_ftrace *finfo = context; struct tep_record *rec; unsigned long long val, pid; int cpu; ret_event_check(finfo, event->tep); if (tep_get_common_field_val(s, event, "common_pid", record, &pid, 1)) return trace_seq_putc(s, '!'); if (tep_get_field_val(s, event, "func", record, &val, 1)) return trace_seq_putc(s, '!'); rec = tracecmd_peek_next_data(tracecmd_curr_thread_handle, &cpu); /* * If the next event is on another CPU, show it. * Even if the next event is the return of this function. */ if (cpu != record->cpu) rec = NULL; if (rec) rec = get_return_for_leaf(s, cpu, pid, val, rec, finfo); if (rec) { /* * If this is a leaf function, then get_return_for_leaf * returns the return of the function */ print_graph_entry_leaf(s, event, record, rec, finfo); tracecmd_free_record(rec); } else print_graph_nested(s, event, record); return 0; } static int fgraph_ret_handler(struct trace_seq *s, struct tep_record *record, struct tep_event *event, void *context) { struct tracecmd_ftrace *finfo = context; unsigned long long rettime, calltime; unsigned long long duration, depth; unsigned long long val; const char *func; unsigned long long retval; bool fgraph_retval_supported = true; int i; ret_event_check(finfo, event->tep); if (tep_get_field_val(s, event, "rettime", record, &rettime, 1)) return trace_seq_putc(s, '!'); if (tep_get_field_val(s, event, "calltime", record, &calltime, 1)) return trace_seq_putc(s, '!'); if (!tep_find_field(event, "retval")) { fgraph_retval_supported = false; } else { if (tep_get_field_val(s, event, "retval", record, &retval, 1)) return trace_seq_putc(s, '!'); } duration = rettime - calltime; /* Overhead */ print_graph_overhead(s, duration); /* Duration */ print_graph_duration(s, duration); if (tep_get_field_val(s, event, "depth", record, &depth, 1)) return trace_seq_putc(s, '!'); /* Function */ for (i = 0; i < (int)(depth * TRACE_GRAPH_INDENT); i++) trace_seq_putc(s, ' '); trace_seq_putc(s, '}'); if (fgraph_tail->set) { if (tep_get_field_val(s, event, "func", record, &val, 0)) return 0; func = tep_find_function(event->tep, val); if (!func) return 0; trace_seq_printf(s, " /* %s */", func); } if (fgraph_depth->set) trace_seq_printf(s, " (%lld)", depth); /* Return Value */ if (fgraph_retval_supported && !fgraph_retval_skip->set) { if (fgraph_retval_dec->set) { trace_seq_printf(s, " (ret=%lld)", retval); } else if (fgraph_retval_hex->set) { trace_seq_printf(s, " (ret=0x%llx)", retval); } else { /* Error codes are in decimal; others are in hex */ if (!IS_LINUX_ERR_VALUE(retval)) trace_seq_printf(s, " (ret=0x%llx)", retval); else trace_seq_printf(s, " (ret=%lld)", retval); } } return 0; } /** * tracecmd_ftrace_load_options - load the ftrace options * * This routine is used for trace-cmd list, to load the builtin * ftrace options in order to list them. As the list command does * not load a trace.dat file where this would normally be loaded. */ void tracecmd_ftrace_load_options(void) { tep_plugin_add_options("ftrace", trace_ftrace_options); } int tracecmd_ftrace_overrides(struct tracecmd_input *handle, struct tracecmd_ftrace *finfo) { struct tep_handle *pevent; struct tep_event *event; finfo->handle = handle; pevent = tracecmd_get_tep(handle); tep_register_event_handler(pevent, -1, "ftrace", "function", function_handler, NULL); tep_register_event_handler(pevent, -1, "ftrace", "funcgraph_entry", fgraph_ent_handler, finfo); tep_register_event_handler(pevent, -1, "ftrace", "funcgraph_exit", fgraph_ret_handler, finfo); tep_plugin_add_options("ftrace", trace_ftrace_options); /* Store the func ret id and event for later use */ event = tep_find_event_by_name(pevent, "ftrace", "funcgraph_exit"); if (!event) return 0; finfo->long_size = tracecmd_long_size(handle); finfo->fgraph_ret_id = event->id; finfo->fgraph_ret_event = event; return 0; } trace-cmd-v3.3.1/lib/trace-cmd/trace-hash.c000066400000000000000000000033221470231550600203360ustar00rootroot00000000000000// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2014, Steven Rostedt * */ #include #include #include #include #include #include "trace-cmd-private.h" #include "trace-hash.h" int __hidden trace_hash_init(struct trace_hash *hash, int buckets) { memset(hash, 0, sizeof(*hash)); hash->buckets = calloc(sizeof(*hash->buckets), buckets); if (!hash->buckets) return -ENOMEM; hash->nr_buckets = buckets; /* If a power of two then we can shortcut */ if (!(buckets & (buckets - 1))) hash->power = buckets - 1; return 0; } void __hidden trace_hash_free(struct trace_hash *hash) { free(hash->buckets); } int __hidden trace_hash_empty(struct trace_hash *hash) { struct trace_hash_item **bucket; trace_hash_for_each_bucket(bucket, hash) if (*bucket) return 0; return 1; } int __hidden trace_hash_add(struct trace_hash *hash, struct trace_hash_item *item) { struct trace_hash_item *next; int bucket = hash->power ? item->key & hash->power : item->key % hash->nr_buckets; if (hash->buckets[bucket]) { next = hash->buckets[bucket]; next->prev = item; } else next = NULL; item->next = next; item->prev = (struct trace_hash_item *)&hash->buckets[bucket]; hash->buckets[bucket] = item; return 1; } __hidden struct trace_hash_item * trace_hash_find(struct trace_hash *hash, unsigned long long key, trace_hash_func match, void *data) { struct trace_hash_item *item; int bucket = hash->power ? key & hash->power : key % hash->nr_buckets; for (item = hash->buckets[bucket]; item; item = item->next) { if (item->key == key) { if (!match) return item; if (match(item, data)) return item; } } return NULL; } trace-cmd-v3.3.1/lib/trace-cmd/trace-hooks.c000066400000000000000000000062221470231550600205400ustar00rootroot00000000000000// SPDX-License-Identifier: LGPL-2.1 /* * Copyright (C) 2015 Red Hat Inc, Steven Rostedt * */ #include #include #include #include "trace-cmd-private.h" #include "trace-cmd-local.h" #include "event-utils.h" struct hook_list *tracecmd_create_event_hook(const char *arg) { struct hook_list *hook; char *system = NULL; char *event; char *match; char *flags = NULL; char *pid = NULL; char *str; char *tok; int index; int ch; int i; hook = malloc(sizeof(*hook)); if (!hook) return NULL; memset(hook, 0, sizeof(*hook)); str = strdup(arg); if (!str) { free(hook); return NULL; } hook->str = str; hook->hook = arg; /* * Hooks are in the form of: * [:],[,]/ * [:],[,] * * Where start_system, start_pid, end_system, and flags are all * optional. * * Flags are (case insensitive): * P - pinned to cpu (wont migrate) * G - global, not hooked to task - currently ignored. * S - save stacks for this event. */ tok = strtok(str, ":,"); if (!tok) goto invalid_tok; /* See what the token was from the original arg */ index = strlen(tok); if (arg[index] == ':') { /* this is a system, the next token must be ',' */ system = tok; tok = strtok(NULL, ","); if (!tok) goto invalid_tok; } event = tok; tok = strtok(NULL, ",/"); if (!tok) goto invalid_tok; match = tok; index = strlen(tok) + tok - str; if (arg[index] == ',') { tok = strtok(NULL, "/"); if (!tok) goto invalid_tok; pid = tok; } hook->start_system = system; hook->start_event = event; hook->start_match = match; hook->pid = pid; /* Now process the end event */ system = NULL; tok = strtok(NULL, ":,"); if (!tok) goto invalid_tok; /* See what the token was from the original arg */ index = tok - str + strlen(tok); if (arg[index] == ':') { /* this is a system, the next token must be ',' */ system = tok; tok = strtok(NULL, ","); if (!tok) goto invalid_tok; } event = tok; tok = strtok(NULL, ","); if (!tok) goto invalid_tok; match = tok; index = strlen(tok) + tok - str; if (arg[index] == ',') { tok = strtok(NULL, ""); if (!tok) goto invalid_tok; flags = tok; } hook->end_system = system; hook->end_event = event; hook->end_match = match; hook->migrate = 1; if (flags) { for (i = 0; flags[i]; i++) { ch = tolower(flags[i]); switch (ch) { case 'p': hook->migrate = 0; break; case 'g': hook->global = 1; break; case 's': hook->stack = 1; break; default: tracecmd_warning("unknown flag %c", flags[i]); } } } printf("start %s:%s:%s (%s) end %s:%s:%s (%s)\n", hook->start_system, hook->start_event, hook->start_match, hook->pid, hook->end_system, hook->end_event, hook->end_match, flags); return hook; invalid_tok: tracecmd_warning("Invalid hook format '%s'", arg); return NULL; } void tracecmd_free_hooks(struct hook_list *hooks) { struct hook_list *hook; while (hooks) { hook = hooks; hooks = hooks->next; free(hook->str); free(hook); } } trace-cmd-v3.3.1/lib/trace-cmd/trace-input.c000066400000000000000000004717231470231550600205700ustar00rootroot00000000000000// SPDX-License-Identifier: LGPL-2.1 /* * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt * */ #include #include #include #include #include #include #include #include #include #include "trace-write-local.h" #include "trace-cmd-local.h" #include "trace-rbtree.h" #include "trace-local.h" #include "kbuffer.h" #include "list.h" #define _STRINGIFY(x) #x #define STRINGIFY(x) _STRINGIFY(x) #define MISSING_EVENTS (1 << 31) #define MISSING_STORED (1 << 30) #define COMMIT_MASK ((1 << 27) - 1) /* force uncompressing in memory */ #define INMEMORY_DECOMPRESS /* for debugging read instead of mmap */ static int force_read = 0; struct page_map { struct list_head list; off_t offset; off_t size; void *map; int ref_count; }; struct follow_event { struct tep_event *event; void *callback_data; int (*callback)(struct tracecmd_input *handle, struct tep_event *, struct tep_record *, int, void *); }; struct page { struct list_head list; off_t offset; struct tracecmd_input *handle; struct page_map *page_map; void *map; int ref_count; int cpu; long long lost_events; #if DEBUG_RECORD struct tep_record *records; #endif }; struct zchunk_cache { struct trace_rbtree_node node; struct tracecmd_compress_chunk *chunk; void *map; int ref; }; struct cpu_zdata { /* uncompressed cpu data */ int fd; char file[26]; /* strlen(COMPR_TEMP_FILE) */ unsigned int count; unsigned int last_chunk; struct trace_rbtree cache; struct tracecmd_compress_chunk *chunks; }; #define COMPR_TEMP_FILE "/tmp/trace_cpu_dataXXXXXX" struct cpu_data { /* the first two never change */ unsigned long long file_offset; unsigned long long file_size; unsigned long long offset; unsigned long long size; unsigned long long timestamp; unsigned long long first_ts; struct list_head page_maps; struct page_map *page_map; struct page **pages; struct tep_record *next; struct page *page; struct kbuffer *kbuf; int nr_pages; int page_cnt; int cpu; int pipe_fd; struct cpu_zdata compress; }; struct cpu_file_data { int cpu; unsigned long long offset; unsigned long long size; }; struct input_buffer_instance { char *name; size_t offset; char *clock; bool latency; int page_size; int cpus; struct cpu_file_data *cpu_data; }; struct ts_offset_sample { long long time; long long offset; long long scaling; long long fraction; }; struct guest_trace_info { struct guest_trace_info *next; char *name; unsigned long long trace_id; int vcpu_count; int *cpu_pid; }; struct timesync_offsets { int ts_samples_count; struct ts_offset_sample *ts_samples; }; struct host_trace_info { unsigned long long peer_trace_id; unsigned int flags; bool sync_enable; int ts_samples_count; struct ts_offset_sample *ts_samples; int cpu_count; struct timesync_offsets *ts_offsets; }; struct tsc2nsec { int mult; int shift; unsigned long long offset; }; struct file_section { unsigned long long section_offset; unsigned long long data_offset; int id; int flags; struct file_section *next; }; struct tracecmd_input { struct tep_handle *pevent; struct tep_plugin_list *plugin_list; struct tracecmd_input *parent; struct tracecmd_filter *filter; struct follow_event *followers; struct follow_event *missed_followers; struct tracecmd_cpu_map *map; unsigned long file_state; unsigned long long trace_id; unsigned long long next_offset; unsigned long flags; int fd; int long_size; int page_size; int page_map_size; int max_cpu; int cpus; int start_cpu; int ref; int nr_followers; int nr_missed_followers; int nr_buffers; /* buffer instances */ bool use_trace_clock; bool read_page; bool use_pipe; bool read_zpage; /* uncompress pages in memory, do not use tmp files */ bool cpu_compressed; int file_version; int map_cnt; unsigned int cpustats_size; struct cpu_zdata latz; struct cpu_data *cpu_data; long long ts_offset; struct tsc2nsec tsc_calc; unsigned int strings_size; /* size of the metadata strings */ char *strings; /* metadata strings */ bool read_compress; struct tracecmd_compression *compress; struct host_trace_info host; double ts2secs; char * cpustats; char * uname; char * version; char * trace_clock; struct input_buffer_instance top_buffer; struct input_buffer_instance *buffers; int parsing_failures; struct guest_trace_info *guest; struct tracecmd_ftrace finfo; struct hook_list *hooks; struct pid_addr_maps *pid_maps; /* file information */ struct file_section *sections; bool options_init; unsigned long long options_start; unsigned long long options_last_offset; size_t total_file_size; /* For custom profilers. */ tracecmd_show_data_func show_data_func; void *private; }; __thread struct tracecmd_input *tracecmd_curr_thread_handle; #define CHECK_READ_STATE(H, S) ((H)->file_version < FILE_VERSION_SECTIONS && (H)->file_state >= (S)) #define HAS_SECTIONS(H) ((H)->flags & TRACECMD_FL_SECTIONED) #define HAS_COMPRESSION(H) ((H)->flags & TRACECMD_FL_COMPRESSION) static int read_options_type(struct tracecmd_input *handle); void tracecmd_set_flag(struct tracecmd_input *handle, int flag) { handle->flags |= flag; } void tracecmd_clear_flag(struct tracecmd_input *handle, int flag) { handle->flags &= ~flag; } unsigned long tracecmd_get_flags(struct tracecmd_input *handle) { return handle->flags; } enum tracecmd_file_states tracecmd_get_file_state(struct tracecmd_input *handle) { return handle->file_state; } void tracecmd_set_private(struct tracecmd_input *handle, void *data) { handle->private = data; } void *tracecmd_get_private(struct tracecmd_input *handle) { return handle->private; } #if DEBUG_RECORD static void remove_record(struct page *page, struct tep_record *record) { if (record->prev) record->prev->next = record->next; else page->records = record->next; if (record->next) record->next->prev = record->prev; } static void add_record(struct page *page, struct tep_record *record) { if (page->records) page->records->prev = record; record->next = page->records; record->prev = NULL; page->records = record; } static const char *show_records(struct page **pages, int nr_pages) { static char buf[BUFSIZ + 1]; struct tep_record *record; struct page *page; int len; int i; memset(buf, 0, sizeof(buf)); len = 0; for (i = 0; i < nr_pages; i++) { page = pages[i]; if (!page) continue; for (record = page->records; record; record = record->next) { int n; n = snprintf(buf+len, BUFSIZ - len, " 0x%lx", record->alloc_addr); len += n; if (len >= BUFSIZ) break; } } return buf; } #else static inline void remove_record(struct page *page, struct tep_record *record) {} static inline void add_record(struct page *page, struct tep_record *record) {} static const char *show_records(struct page **pages, int nr_pages) { return ""; } #endif /** * trace_set_guest_map - set map to input handle * @handle: The handle to set the cpu map to * @map: The cpu map for this handle (to the host) * * Assign the mapping of host to guest for a guest handle. */ __hidden void trace_set_guest_map(struct tracecmd_input *handle, struct tracecmd_cpu_map *map) { handle->map = map; } __hidden struct tracecmd_cpu_map *trace_get_guest_map(struct tracecmd_input *handle) { return handle->map; } __hidden void trace_set_guest_map_cnt(struct tracecmd_input *handle, int count) { handle->map_cnt = count; } __hidden int trace_get_guest_map_cnt(struct tracecmd_input *handle) { return handle->map_cnt; } static int init_cpu(struct tracecmd_input *handle, int cpu); static ssize_t do_read_fd(int fd, void *data, size_t size) { ssize_t tot = 0; ssize_t r; do { r = read(fd, data + tot, size - tot); tot += r; if (!r) break; if (r < 0) return r; } while (tot != size); return tot; } static inline int do_lseek(struct tracecmd_input *handle, int offset, int whence) { if (handle->read_compress) return tracecmd_compress_lseek(handle->compress, offset, whence); else return lseek(handle->fd, offset, whence); } static inline ssize_t do_read(struct tracecmd_input *handle, void *data, size_t size) { if (handle->read_compress) return tracecmd_compress_buffer_read(handle->compress, data, size); else return do_read_fd(handle->fd, data, size); } static ssize_t do_read_check(struct tracecmd_input *handle, void *data, size_t size) { ssize_t ret; ret = do_read(handle, data, size); if (ret < 0) return ret; if (ret != size) return -1; return 0; } static char *read_string(struct tracecmd_input *handle) { char buf[BUFSIZ]; char *str = NULL; size_t size = 0; ssize_t i; ssize_t r; for (;;) { r = do_read(handle, buf, BUFSIZ); if (r <= 0) goto fail; for (i = 0; i < r; i++) { if (!buf[i]) break; } if (i < r) break; if (str) { size += BUFSIZ; str = realloc(str, size); if (!str) return NULL; memcpy(str + (size - BUFSIZ), buf, BUFSIZ); } else { size = BUFSIZ; str = malloc(size); if (!str) return NULL; memcpy(str, buf, size); } } /* move the file descriptor to the end of the string */ r = do_lseek(handle, -(r - (i+1)), SEEK_CUR); if (r < 0) goto fail; if (str) { size += i + 1; str = realloc(str, size); if (!str) return NULL; memcpy(str + (size - i), buf, i + 1); } else { size = i + 1; str = malloc(size); if (!str) return NULL; memcpy(str, buf, i + 1); } return str; fail: if (str) free(str); return NULL; } static int read2(struct tracecmd_input *handle, unsigned short *size) { struct tep_handle *pevent = handle->pevent; unsigned short data; if (do_read_check(handle, &data, 2)) return -1; *size = tep_read_number(pevent, &data, 2); return 0; } static int read4(struct tracecmd_input *handle, unsigned int *size) { struct tep_handle *pevent = handle->pevent; unsigned int data; if (do_read_check(handle, &data, 4)) return -1; *size = tep_read_number(pevent, &data, 4); return 0; } static int read8(struct tracecmd_input *handle, unsigned long long *size) { struct tep_handle *pevent = handle->pevent; unsigned long long data; if (do_read_check(handle, &data, 8)) return -1; *size = tep_read_number(pevent, &data, 8); return 0; } __hidden void in_uncompress_reset(struct tracecmd_input *handle) { if (handle->compress) { handle->read_compress = false; tracecmd_compress_reset(handle->compress); } } __hidden int in_uncompress_block(struct tracecmd_input *handle) { int ret = 0; if (handle->compress) { ret = tracecmd_uncompress_block(handle->compress); if (!ret) handle->read_compress = true; } return ret; } static struct file_section *section_get(struct tracecmd_input *handle, int id) { struct file_section *sec; for (sec = handle->sections; sec; sec = sec->next) { if (sec->id == id) return sec; } return NULL; } static struct file_section *section_open(struct tracecmd_input *handle, int id) { struct file_section *sec = section_get(handle, id); if (!sec) return NULL; if (lseek(handle->fd, sec->data_offset, SEEK_SET) == (off_t)-1) return NULL; if ((sec->flags & TRACECMD_SEC_FL_COMPRESS) && in_uncompress_block(handle)) return NULL; return sec; } static void section_close(struct tracecmd_input *handle, struct file_section *sec) { if (sec->flags & TRACECMD_SEC_FL_COMPRESS) in_uncompress_reset(handle); } static int section_add_or_update(struct tracecmd_input *handle, int id, int flags, unsigned long long section_offset, unsigned long long data_offset) { struct file_section *sec = section_get(handle, id); if (!sec) { sec = calloc(1, sizeof(struct file_section)); if (!sec) return -1; sec->next = handle->sections; handle->sections = sec; sec->id = id; } if (section_offset) sec->section_offset = section_offset; if (data_offset) sec->data_offset = data_offset; if (flags >= 0) sec->flags = flags; return 0; } static int read_header_files(struct tracecmd_input *handle) { struct tep_handle *pevent = handle->pevent; unsigned long long size; char *header; char buf[BUFSIZ]; if (CHECK_READ_STATE(handle, TRACECMD_FILE_HEADERS)) return 0; if (!HAS_SECTIONS(handle)) section_add_or_update(handle, TRACECMD_OPTION_HEADER_INFO, 0, 0, lseek(handle->fd, 0, SEEK_CUR)); if (do_read_check(handle, buf, 12)) return -1; if (memcmp(buf, "header_page", 12) != 0) return -1; if (read8(handle, &size) < 0) return -1; header = malloc(size); if (!header) return -1; if (do_read_check(handle, header, size)) goto failed_read; tep_parse_header_page(pevent, header, size, handle->long_size); free(header); /* * The size field in the page is of type long, * use that instead, since it represents the kernel. */ handle->long_size = tep_get_header_page_size(pevent); if (do_read_check(handle, buf, 13)) return -1; if (memcmp(buf, "header_event", 13) != 0) return -1; if (read8(handle, &size) < 0) return -1; header = malloc(size); if (!header) return -1; if (do_read_check(handle, header, size)) goto failed_read; free(header); handle->file_state = TRACECMD_FILE_HEADERS; return 0; failed_read: free(header); return -1; } static int regex_event_buf(const char *file, int size, regex_t *epreg) { char *buf; char *line; int ret; buf = malloc(size + 1); if (!buf) { tracecmd_warning("Insufficient memory"); return 0; } strncpy(buf, file, size); buf[size] = 0; /* get the name from the first line */ line = strtok(buf, "\n"); if (!line) { tracecmd_warning("No newline found in '%s'", buf); free(buf); return 0; } /* skip name if it is there */ if (strncmp(line, "name: ", 6) == 0) line += 6; ret = regexec(epreg, line, 0, NULL, 0) == 0; free(buf); return ret; } static int read_ftrace_file(struct tracecmd_input *handle, unsigned long long size, int print, regex_t *epreg) { struct tep_handle *pevent = handle->pevent; char *buf; buf = malloc(size); if (!buf) return -1; if (do_read_check(handle, buf, size)) { free(buf); return -1; } if (epreg) { if (print || regex_event_buf(buf, size, epreg)) printf("%.*s\n", (int)size, buf); } else { if (tep_parse_event(pevent, buf, size, "ftrace")) handle->parsing_failures++; } free(buf); return 0; } static int read_event_file(struct tracecmd_input *handle, char *system, unsigned long long size, int print, int *sys_printed, regex_t *epreg) { struct tep_handle *pevent = handle->pevent; char *buf; buf = malloc(size); if (!buf) return -1; if (do_read_check(handle, buf, size)) { free(buf); return -1; } if (epreg) { if (print || regex_event_buf(buf, size, epreg)) { if (!*sys_printed) { printf("\nsystem: %s\n", system); *sys_printed = 1; } printf("%.*s\n", (int)size, buf); } } else { if (tep_parse_event(pevent, buf, size, system)) handle->parsing_failures++; } free(buf); return 0; } static int make_preg_files(const char *regex, regex_t *system, regex_t *event, int *unique) { char *buf; char *sstr; char *estr; int ret; /* unique is set if a colon is found */ *unique = 0; /* split "system:event" into "system" and "event" */ buf = strdup(regex); if (!buf) return -ENOMEM; sstr = strtok(buf, ":"); estr = strtok(NULL, ":"); /* If no colon is found, set event == system */ if (!estr) estr = sstr; else *unique = 1; ret = regcomp(system, sstr, REG_ICASE|REG_NOSUB); if (ret) { tracecmd_warning("Bad regular expression '%s'", sstr); goto out; } ret = regcomp(event, estr, REG_ICASE|REG_NOSUB); if (ret) { tracecmd_warning("Bad regular expression '%s'", estr); goto out; } out: free(buf); return ret; } static int read_ftrace_files(struct tracecmd_input *handle, const char *regex) { unsigned long long size; regex_t spreg; regex_t epreg; regex_t *sreg = NULL; regex_t *ereg = NULL; unsigned int count, i; int print_all = 0; int unique; int ret; if (CHECK_READ_STATE(handle, TRACECMD_FILE_FTRACE_EVENTS)) return 0; if (!HAS_SECTIONS(handle)) section_add_or_update(handle, TRACECMD_OPTION_FTRACE_EVENTS, 0, 0, lseek(handle->fd, 0, SEEK_CUR)); if (regex) { sreg = &spreg; ereg = &epreg; ret = make_preg_files(regex, sreg, ereg, &unique); if (ret) return -1; if (regexec(sreg, "ftrace", 0, NULL, 0) == 0) { /* * If the system matches a regex that did * not contain a colon, then print all events. */ if (!unique) print_all = 1; } else if (unique) { /* * The user specified a unique event that did * not match the ftrace system. Don't print any * events here. */ regfree(sreg); regfree(ereg); sreg = NULL; ereg = NULL; } } ret = read4(handle, &count); if (ret < 0) goto out; for (i = 0; i < count; i++) { ret = read8(handle, &size); if (ret < 0) goto out; ret = read_ftrace_file(handle, size, print_all, ereg); if (ret < 0) goto out; } handle->file_state = TRACECMD_FILE_FTRACE_EVENTS; ret = 0; out: if (sreg) { regfree(sreg); regfree(ereg); } return ret; } static int read_event_files(struct tracecmd_input *handle, const char *regex) { unsigned long long size; char *system = NULL; regex_t spreg; regex_t epreg; regex_t *sreg = NULL; regex_t *ereg = NULL; regex_t *reg; unsigned int systems; unsigned int count; unsigned int i, x; int print_all; int sys_printed; int unique; int ret; if (CHECK_READ_STATE(handle, TRACECMD_FILE_ALL_EVENTS)) return 0; if (!HAS_SECTIONS(handle)) section_add_or_update(handle, TRACECMD_OPTION_EVENT_FORMATS, 0, 0, lseek(handle->fd, 0, SEEK_CUR)); if (regex) { sreg = &spreg; ereg = &epreg; ret = make_preg_files(regex, sreg, ereg, &unique); if (ret) return -1; } ret = read4(handle, &systems); if (ret < 0) goto out; for (i = 0; i < systems; i++) { system = read_string(handle); if (!system) { ret = -1; goto out; } sys_printed = 0; print_all = 0; reg = ereg; if (sreg) { if (regexec(sreg, system, 0, NULL, 0) == 0) { /* * If the user passed in a regex that * did not contain a colon, then we can * print all the events of this system. */ if (!unique) print_all = 1; } else if (unique) { /* * The user passed in a unique event that * specified a specific system and event. * Since this system doesn't match this * event, then we don't print any events * for this system. */ reg = NULL; } } ret = read4(handle, &count); if (ret < 0) goto out; for (x=0; x < count; x++) { ret = read8(handle, &size); if (ret < 0) goto out; ret = read_event_file(handle, system, size, print_all, &sys_printed, reg); if (ret < 0) goto out; } free(system); } system = NULL; handle->file_state = TRACECMD_FILE_ALL_EVENTS; ret = 0; out: if (sreg) { regfree(sreg); regfree(ereg); } free(system); return ret; } static int read_proc_kallsyms(struct tracecmd_input *handle) { struct tep_handle *tep = handle->pevent; unsigned int size; char *buf; if (CHECK_READ_STATE(handle, TRACECMD_FILE_KALLSYMS)) return 0; if (!HAS_SECTIONS(handle)) section_add_or_update(handle, TRACECMD_OPTION_KALLSYMS, 0, 0, lseek(handle->fd, 0, SEEK_CUR)); if (read4(handle, &size) < 0) return -1; if (!size) { handle->file_state = TRACECMD_FILE_KALLSYMS; return 0; /* OK? */ } buf = malloc(size+1); if (!buf) return -1; if (do_read_check(handle, buf, size)){ free(buf); return -1; } buf[size] = 0; tep_parse_kallsyms(tep, buf); free(buf); handle->file_state = TRACECMD_FILE_KALLSYMS; return 0; } static int read_ftrace_printk(struct tracecmd_input *handle) { unsigned int size; char *buf; if (CHECK_READ_STATE(handle, TRACECMD_FILE_PRINTK)) return 0; if (!HAS_SECTIONS(handle)) section_add_or_update(handle, TRACECMD_OPTION_PRINTK, 0, 0, lseek(handle->fd, 0, SEEK_CUR)); if (read4(handle, &size) < 0) return -1; if (!size) { handle->file_state = TRACECMD_FILE_PRINTK; return 0; /* OK? */ } buf = malloc(size + 1); if (!buf) return -1; if (do_read_check(handle, buf, size)) { free(buf); return -1; } buf[size] = 0; tep_parse_printk_formats(handle->pevent, buf); free(buf); handle->file_state = TRACECMD_FILE_PRINTK; return 0; } static int read_and_parse_cmdlines(struct tracecmd_input *handle); /** * tracecmd_get_parsing_failures - get the count of parsing failures * @handle: input handle for the trace.dat file * * This returns the count of failures while parsing the event files */ int tracecmd_get_parsing_failures(struct tracecmd_input *handle) { if (handle) return handle->parsing_failures; return 0; } static int read_cpus(struct tracecmd_input *handle) { unsigned int cpus; if (CHECK_READ_STATE(handle, TRACECMD_FILE_CPU_COUNT)) return 0; if (read4(handle, &cpus) < 0) return -1; handle->cpus = cpus; handle->max_cpu = cpus; tep_set_cpus(handle->pevent, handle->cpus); handle->file_state = TRACECMD_FILE_CPU_COUNT; return 0; } static int read_headers_v6(struct tracecmd_input *handle, enum tracecmd_file_states state, const char *regex) { int ret; /* Set to read all if state is zero */ if (!state) state = TRACECMD_FILE_OPTIONS; if (state <= handle->file_state) return 0; handle->parsing_failures = 0; ret = read_header_files(handle); if (ret < 0) return -1; if (state <= handle->file_state) return 0; ret = read_ftrace_files(handle, NULL); if (ret < 0) return -1; if (state <= handle->file_state) return 0; ret = read_event_files(handle, regex); if (ret < 0) return -1; if (state <= handle->file_state) return 0; ret = read_proc_kallsyms(handle); if (ret < 0) return -1; if (state <= handle->file_state) return 0; ret = read_ftrace_printk(handle); if (ret < 0) return -1; if (state <= handle->file_state) return 0; if (read_and_parse_cmdlines(handle) < 0) return -1; if (state <= handle->file_state) return 0; if (read_cpus(handle) < 0) return -1; if (state <= handle->file_state) return 0; if (read_options_type(handle) < 0) return -1; return 0; } static int handle_options(struct tracecmd_input *handle); static const char *get_metadata_string(struct tracecmd_input *handle, int offset) { if (!handle || !handle->strings || offset < 0 || handle->strings_size >= offset) return NULL; return handle->strings + offset; } static int read_section_header(struct tracecmd_input *handle, unsigned short *id, unsigned short *flags, unsigned long long *size, const char **description) { unsigned short fl; unsigned short sec_id; unsigned long long sz; int desc; if (read2(handle, &sec_id)) return -1; if (read2(handle, &fl)) return -1; if (read4(handle, (unsigned int *)&desc)) return -1; if (read8(handle, &sz)) return -1; if (id) *id = sec_id; if (flags) *flags = fl; if (size) *size = sz; if (description) *description = get_metadata_string(handle, desc); return 0; } static int handle_section(struct tracecmd_input *handle, struct file_section *section, const char *regex) { unsigned short id, flags; unsigned long long size; int ret; if (lseek(handle->fd, section->section_offset, SEEK_SET) == (off_t)-1) return -1; if (read_section_header(handle, &id, &flags, &size, NULL)) return -1; section->flags = flags; if (id != section->id) return -1; section->data_offset = lseek(handle->fd, 0, SEEK_CUR); if ((section->flags & TRACECMD_SEC_FL_COMPRESS) && in_uncompress_block(handle)) return -1; switch (section->id) { case TRACECMD_OPTION_HEADER_INFO: ret = read_header_files(handle); break; case TRACECMD_OPTION_FTRACE_EVENTS: ret = read_ftrace_files(handle, NULL); break; case TRACECMD_OPTION_EVENT_FORMATS: ret = read_event_files(handle, regex); break; case TRACECMD_OPTION_KALLSYMS: ret = read_proc_kallsyms(handle); break; case TRACECMD_OPTION_PRINTK: ret = read_ftrace_printk(handle); break; case TRACECMD_OPTION_CMDLINES: ret = read_and_parse_cmdlines(handle); break; default: ret = 0; break; } if (section->flags & TRACECMD_SEC_FL_COMPRESS) in_uncompress_reset(handle); return ret; } static int read_headers(struct tracecmd_input *handle, const char *regex) { struct file_section *section; if (handle->options_init) return 0; if (!handle->options_start) return -1; if (lseek(handle->fd, handle->options_start, SEEK_SET) == (off_t)-1) { tracecmd_warning("Filed to goto options offset %lld", handle->options_start); return -1; } if (handle_options(handle)) return -1; section = handle->sections; while (section) { if (handle_section(handle, section, NULL)) return -1; section = section->next; } handle->options_init = true; return 0; } /** * tracecmd_read_headers - read the header information from trace.dat * @handle: input handle for the trace.dat file * @state: The state to read up to or zero to read up to options. * * This reads the trace.dat file for various information. Like the * format of the ring buffer, event formats, ftrace formats, kallsyms * and printk. This may be called multiple times with different @state * values, to read partial data at a time. It will always continue * where it left off. */ int tracecmd_read_headers(struct tracecmd_input *handle, enum tracecmd_file_states state) { if (!HAS_SECTIONS(handle)) return read_headers_v6(handle, state, NULL); return read_headers(handle, NULL); } static unsigned long long calc_page_offset(struct tracecmd_input *handle, unsigned long long offset) { return offset & ~(handle->page_size - 1); } static int read_page(struct tracecmd_input *handle, off_t offset, int cpu, void *map) { off_t save_seek; off_t ret; if (handle->use_pipe) { ret = read(handle->cpu_data[cpu].pipe_fd, map, handle->page_size); /* Set EAGAIN if the pipe is empty */ if (ret < 0) { errno = EAGAIN; return -1; } else if (ret == 0) { /* Set EINVAL when the pipe has closed */ errno = EINVAL; return -1; } return 0; } /* other parts of the code may expect the pointer to not move */ save_seek = lseek(handle->fd, 0, SEEK_CUR); ret = lseek(handle->fd, offset, SEEK_SET); if (ret < 0) return -1; ret = read(handle->fd, map, handle->page_size); if (ret < 0) return -1; /* reset the file pointer back */ lseek(handle->fd, save_seek, SEEK_SET); return 0; } /* page_map_size must be a power of two */ static unsigned long long normalize_size(unsigned long long size) { /* From Hacker's Delight: or bits after first set bit to all 1s */ size |= (size >> 1); size |= (size >> 2); size |= (size >> 4); size |= (size >> 8); size |= (size >> 16); size |= (size >> 32); /* Clear all bits except first one for previous power of two */ return size - (size >> 1); } static void free_page_map(struct page_map *page_map) { page_map->ref_count--; if (page_map->ref_count) return; munmap(page_map->map, page_map->size); list_del(&page_map->list); free(page_map); } #define CHUNK_CHECK_OFFSET(C, O) ((O) >= (C)->offset && (O) < ((C)->offset + (C)->size)) static int chunk_cmp(const void *A, const void *B) { const struct tracecmd_compress_chunk *a = A; const struct tracecmd_compress_chunk *b = B; if (CHUNK_CHECK_OFFSET(b, a->offset)) return 0; if (a->offset < b->offset) return -1; return 1; } static struct tracecmd_compress_chunk *get_zchunk(struct cpu_data *cpu, off_t offset) { struct cpu_zdata *cpuz = &cpu->compress; struct tracecmd_compress_chunk *chunk; struct tracecmd_compress_chunk key; if (!cpuz->chunks) return NULL; if (offset > (cpuz->chunks[cpuz->count - 1].offset + cpuz->chunks[cpuz->count - 1].size)) return NULL; /* check if the requested offset is in the last requested chunk or in the next chunk */ if (CHUNK_CHECK_OFFSET(cpuz->chunks + cpuz->last_chunk, offset)) return cpuz->chunks + cpuz->last_chunk; cpuz->last_chunk++; if (cpuz->last_chunk < cpuz->count && CHUNK_CHECK_OFFSET(cpuz->chunks + cpuz->last_chunk, offset)) return cpuz->chunks + cpuz->last_chunk; key.offset = offset; chunk = bsearch(&key, cpuz->chunks, cpuz->count, sizeof(*chunk), chunk_cmp); if (!chunk) /* should never happen */ return NULL; cpuz->last_chunk = chunk - cpuz->chunks; return chunk; } static void free_zpage(struct cpu_data *cpu_data, off_t offset) { struct trace_rbtree_node *node; struct zchunk_cache *cache; offset -= cpu_data->file_offset; node = trace_rbtree_find(&cpu_data->compress.cache, (void *)&offset); if (!node) return; cache = container_of(node, struct zchunk_cache, node); cache->ref--; if (cache->ref) return; trace_rbtree_delete(&cpu_data->compress.cache, node); free(cache->map); free(cache); } static void *read_zpage(struct tracecmd_input *handle, int cpu, off_t offset) { struct cpu_data *cpu_data = &handle->cpu_data[cpu]; struct tracecmd_compress_chunk *chunk; struct trace_rbtree_node *node; struct zchunk_cache *cache; void *map = NULL; int pindex; int size; offset -= cpu_data->file_offset; /* Look in the cache of already loaded chunks */ node = trace_rbtree_find(&cpu_data->compress.cache, (void *)&offset); if (node) { cache = container_of(node, struct zchunk_cache, node); cache->ref++; goto out; } chunk = get_zchunk(cpu_data, offset); if (!chunk) return NULL; size = handle->page_size > chunk->size ? handle->page_size : chunk->size; map = malloc(size); if (!map) return NULL; if (tracecmd_uncompress_chunk(handle->compress, chunk, map) < 0) goto error; cache = calloc(1, sizeof(struct zchunk_cache)); if (!cache) goto error; cache->ref = 1; cache->chunk = chunk; cache->map = map; trace_rbtree_insert(&cpu_data->compress.cache, &cache->node); /* a chunk can hold multiple pages, get the requested one */ out: pindex = (offset - cache->chunk->offset) / handle->page_size; return cache->map + (pindex * handle->page_size); error: free(map); return NULL; } static void *allocate_page_map(struct tracecmd_input *handle, struct page *page, int cpu, off_t offset) { struct cpu_data *cpu_data = &handle->cpu_data[cpu]; struct page_map *page_map; off_t map_size; off_t map_offset; void *map; int ret; int fd; if (handle->cpu_compressed) { if (handle->read_zpage) return read_zpage(handle, cpu, offset); offset -= cpu_data->file_offset; } if (handle->read_page) { map = malloc(handle->page_size); if (!map) return NULL; ret = read_page(handle, offset, cpu, map); if (ret < 0) { free(map); return NULL; } return map; } map_size = handle->page_map_size; map_offset = offset & ~(map_size - 1); if (!handle->cpu_compressed && map_offset < cpu_data->file_offset) { map_size -= cpu_data->file_offset - map_offset; map_offset = cpu_data->file_offset; } page_map = cpu_data->page_map; if (page_map && page_map->offset == map_offset) goto out; list_for_each_entry(page_map, &cpu_data->page_maps, list) { if (page_map->offset == map_offset) goto out; } page_map = calloc(1, sizeof(*page_map)); if (!page_map) return NULL; if (map_offset + map_size > cpu_data->file_offset + cpu_data->file_size) map_size -= map_offset + map_size - (cpu_data->file_offset + cpu_data->file_size); if (cpu_data->compress.fd >= 0) fd = cpu_data->compress.fd; else fd = handle->fd; again: page_map->size = map_size; page_map->offset = map_offset; page_map->map = mmap(NULL, map_size, PROT_READ, MAP_PRIVATE, fd, map_offset); if (page_map->map == MAP_FAILED) { /* Try a smaller map */ map_size >>= 1; if (map_size < handle->page_size) { free(page_map); return NULL; } handle->page_map_size = map_size; map_offset = offset & ~(map_size - 1); /* * Note, it is now possible to get duplicate memory * maps. But that's fine, the previous maps with * larger sizes will eventually be unmapped. */ goto again; } list_add(&page_map->list, &cpu_data->page_maps); out: if (cpu_data->page_map != page_map) { struct page_map *old_map = cpu_data->page_map; cpu_data->page_map = page_map; page_map->ref_count++; if (old_map) free_page_map(old_map); } page->page_map = page_map; page_map->ref_count++; return page_map->map + offset - page_map->offset; } static struct page *allocate_page(struct tracecmd_input *handle, int cpu, off_t offset) { struct cpu_data *cpu_data = &handle->cpu_data[cpu]; struct page **pages; struct page *page; int index; index = (offset - cpu_data->file_offset) / handle->page_size; if (index >= cpu_data->nr_pages) { pages = realloc(cpu_data->pages, (index + 1) * sizeof(*cpu_data->pages)); if (!pages) return NULL; memset(pages + cpu_data->nr_pages, 0, (index + 1 - cpu_data->nr_pages) * sizeof(*cpu_data->pages)); cpu_data->pages = pages; cpu_data->nr_pages = index + 1; } if (cpu_data->pages[index]) { cpu_data->pages[index]->ref_count++; return cpu_data->pages[index]; } page = malloc(sizeof(*page)); if (!page) return NULL; memset(page, 0, sizeof(*page)); page->offset = offset; page->handle = handle; page->cpu = cpu; page->map = allocate_page_map(handle, page, cpu, offset); if (!page->map) { free(page); return NULL; } cpu_data->pages[index] = page; cpu_data->page_cnt++; page->ref_count = 1; return page; } static void __free_page(struct tracecmd_input *handle, struct page *page) { struct cpu_data *cpu_data = &handle->cpu_data[page->cpu]; struct page **pages; int index; if (!page->ref_count) { tracecmd_critical("Page ref count is zero!"); return; } page->ref_count--; if (page->ref_count) return; if (handle->read_page) free(page->map); else if (handle->read_zpage) free_zpage(cpu_data, page->offset); else free_page_map(page->page_map); index = (page->offset - cpu_data->file_offset) / handle->page_size; cpu_data->pages[index] = NULL; cpu_data->page_cnt--; free(page); if (handle->use_pipe) { for (index = cpu_data->nr_pages - 1; index > 0; index--) if (cpu_data->pages[index]) break; if (index < (cpu_data->nr_pages - 1)) { pages = realloc(cpu_data->pages, (index + 1) * sizeof(*cpu_data->pages)); if (!pages) return; cpu_data->pages = pages; cpu_data->nr_pages = index + 1; } } } static void free_page(struct tracecmd_input *handle, int cpu) { if (!handle->cpu_data || cpu >= handle->cpus || !handle->cpu_data[cpu].page) return; __free_page(handle, handle->cpu_data[cpu].page); handle->cpu_data[cpu].page = NULL; } static void __free_record(struct tep_record *record) { if (record->priv) { struct page *page = record->priv; remove_record(page, record); __free_page(page->handle, page); } free(record); } void tracecmd_free_record(struct tep_record *record) { if (!record) return; if (!record->ref_count) { tracecmd_critical("record ref count is zero!"); return; } record->ref_count--; if (record->ref_count) return; if (record->locked) { tracecmd_critical("freeing record when it is locked!"); return; } record->data = NULL; __free_record(record); } void tracecmd_record_ref(struct tep_record *record) { record->ref_count++; #if DEBUG_RECORD /* Update locating of last reference */ record->alloc_addr = (unsigned long)__builtin_return_address(0); #endif } static void free_next(struct tracecmd_input *handle, int cpu) { struct tep_record *record; if (!handle->cpu_data || cpu >= handle->cpus) return; record = handle->cpu_data[cpu].next; if (!record) return; handle->cpu_data[cpu].next = NULL; record->locked = 0; tracecmd_free_record(record); } /* This functions was taken from the Linux kernel */ static unsigned long long mul_u64_u32_shr(unsigned long long a, unsigned long long mul, unsigned int shift) { unsigned int ah, al; unsigned long long ret; al = a; ah = a >> 32; ret = (al * mul) >> shift; if (ah) ret += (ah * mul) << (32 - shift); return ret; } static inline unsigned long long timestamp_correction_calc(unsigned long long ts, unsigned int flags, struct ts_offset_sample *min, struct ts_offset_sample *max) { long long tscor; if (flags & TRACECMD_TSYNC_FLAG_INTERPOLATE) { long long delta = max->time - min->time; long long offset = ((long long)ts - min->time) * (max->offset - min->offset); tscor = min->offset + (offset + delta / 2) / delta; } else { tscor = min->offset; } ts = (ts * min->scaling) >> min->fraction; if (tscor < 0) return ts - llabs(tscor); return ts + tscor; } static unsigned long long timestamp_host_sync(unsigned long long ts, int cpu, struct tracecmd_input *handle) { struct timesync_offsets *tsync; int min, mid, max; if (cpu >= handle->host.cpu_count) return ts; tsync = &handle->host.ts_offsets[cpu]; /* We have one sample, nothing to calc here */ if (tsync->ts_samples_count == 1) return ts + tsync->ts_samples[0].offset; /* We have two samples, nothing to search here */ if (tsync->ts_samples_count == 2) return timestamp_correction_calc(ts, handle->host.flags, &tsync->ts_samples[0], &tsync->ts_samples[1]); /* We have more than two samples */ if (ts <= tsync->ts_samples[0].time) return timestamp_correction_calc(ts, handle->host.flags, &tsync->ts_samples[0], &tsync->ts_samples[1]); else if (ts >= tsync->ts_samples[tsync->ts_samples_count-1].time) return timestamp_correction_calc(ts, handle->host.flags, &tsync->ts_samples[tsync->ts_samples_count-2], &tsync->ts_samples[tsync->ts_samples_count-1]); min = 0; max = tsync->ts_samples_count-1; mid = (min + max)/2; while (min <= max) { if (ts < tsync->ts_samples[mid].time) max = mid - 1; else if (ts > tsync->ts_samples[mid].time) min = mid + 1; else break; mid = (min + max)/2; } return timestamp_correction_calc(ts, handle->host.flags, &tsync->ts_samples[mid], &tsync->ts_samples[mid+1]); } static unsigned long long timestamp_calc(unsigned long long ts, int cpu, struct tracecmd_input *handle) { /* do not modify raw timestamps */ if (handle->flags & TRACECMD_FL_RAW_TS) return ts; /* Guest trace file, sync with host timestamps */ if (handle->host.sync_enable) ts = timestamp_host_sync(ts, cpu, handle); if (handle->ts2secs) { /* user specified clock frequency */ ts *= handle->ts2secs; } else if (handle->tsc_calc.mult) { /* auto calculated TSC clock frequency */ ts = mul_u64_u32_shr(ts, handle->tsc_calc.mult, handle->tsc_calc.shift); } /* User specified time offset with --ts-offset or --date options */ ts += handle->ts_offset; return ts; } /* * Page is mapped, now read in the page header info. */ static int update_page_info(struct tracecmd_input *handle, int cpu) { struct tep_handle *pevent = handle->pevent; void *ptr = handle->cpu_data[cpu].page->map; struct kbuffer *kbuf = handle->cpu_data[cpu].kbuf; /* FIXME: handle header page */ if (tep_get_header_timestamp_size(pevent) != 8) { tracecmd_warning("expected a long long type for timestamp"); return -1; } kbuffer_load_subbuffer(kbuf, ptr); if (kbuffer_subbuffer_size(kbuf) > handle->page_size) { tracecmd_warning("bad page read, with size of %d", kbuffer_subbuffer_size(kbuf)); return -1; } handle->cpu_data[cpu].timestamp = timestamp_calc(kbuffer_timestamp(kbuf), cpu, handle); return 0; } /* * get_page maps a page for a given cpu. * * Returns 1 if the page was already mapped, * 0 if it mapped successfully * -1 on error */ static int get_page(struct tracecmd_input *handle, int cpu, off_t offset) { /* Don't map if the page is already where we want */ if (handle->cpu_data[cpu].offset == offset && handle->cpu_data[cpu].page) return 1; /* Do not map no data for CPU */ if (!handle->cpu_data[cpu].size) return -1; if (offset & (handle->page_size - 1)) { errno = -EINVAL; tracecmd_critical("bad page offset %llx", offset); return -1; } if (offset < handle->cpu_data[cpu].file_offset || offset > handle->cpu_data[cpu].file_offset + handle->cpu_data[cpu].file_size) { errno = -EINVAL; tracecmd_critical("bad page offset %llx", offset); return -1; } handle->cpu_data[cpu].offset = offset; handle->cpu_data[cpu].size = (handle->cpu_data[cpu].file_offset + handle->cpu_data[cpu].file_size) - offset; free_page(handle, cpu); handle->cpu_data[cpu].page = allocate_page(handle, cpu, offset); if (!handle->cpu_data[cpu].page) return -1; if (update_page_info(handle, cpu)) return -1; return 0; } static int get_next_page(struct tracecmd_input *handle, int cpu) { off_t offset; if (!handle->cpu_data[cpu].page && !handle->use_pipe) return 0; free_page(handle, cpu); if (handle->cpu_data[cpu].size <= handle->page_size) { handle->cpu_data[cpu].offset = 0; return 0; } offset = handle->cpu_data[cpu].offset + handle->page_size; return get_page(handle, cpu, offset); } static struct tep_record * peek_event(struct tracecmd_input *handle, unsigned long long offset, int cpu) { struct tep_record *record = NULL; /* * Since the timestamp is calculated from the beginning * of the page and through each event, we reset the * page to the beginning. This is just used by * tracecmd_read_at. */ update_page_info(handle, cpu); do { free_next(handle, cpu); record = tracecmd_peek_data(handle, cpu); if (record && (record->offset + record->record_size) > offset) break; } while (record); return record; } static struct tep_record * read_event(struct tracecmd_input *handle, unsigned long long offset, int cpu) { struct tep_record *record; record = peek_event(handle, offset, cpu); if (record) record = tracecmd_read_data(handle, cpu); return record; } static struct tep_record * find_and_peek_event(struct tracecmd_input *handle, unsigned long long offset, int *pcpu) { unsigned long long page_offset; int cpu; /* find the cpu that this offset exists in */ for (cpu = 0; cpu < handle->cpus; cpu++) { if (offset >= handle->cpu_data[cpu].file_offset && offset < handle->cpu_data[cpu].file_offset + handle->cpu_data[cpu].file_size) break; } /* Not found? */ if (cpu == handle->cpus) return NULL; /* Move this cpu index to point to this offest */ page_offset = calc_page_offset(handle, offset); if (get_page(handle, cpu, page_offset) < 0) return NULL; if (pcpu) *pcpu = cpu; return peek_event(handle, offset, cpu); } static struct tep_record * find_and_read_event(struct tracecmd_input *handle, unsigned long long offset, int *pcpu) { struct tep_record *record; int cpu; record = find_and_peek_event(handle, offset, &cpu); if (record) { record = tracecmd_read_data(handle, cpu); if (pcpu) *pcpu = cpu; } return record; } /** * tracecmd_read_at - read a record from a specific offset * @handle: input handle for the trace.dat file * @offset: the offset into the file to find the record * @pcpu: pointer to a variable to store the CPU id the record was found in * * This function is useful when looking for a previous record. * You can store the offset of the record "record->offset" and use that * offset to retreive the record again without needing to store any * other information about the record. * * The record returned must be freed. */ struct tep_record * tracecmd_read_at(struct tracecmd_input *handle, unsigned long long offset, int *pcpu) { unsigned long long page_offset; int cpu; page_offset = calc_page_offset(handle, offset); /* check to see if we have this page already */ for (cpu = 0; cpu < handle->cpus; cpu++) { if (handle->cpu_data[cpu].offset == page_offset && handle->cpu_data[cpu].file_size) break; } if (cpu < handle->cpus && handle->cpu_data[cpu].page) { if (pcpu) *pcpu = cpu; return read_event(handle, offset, cpu); } else return find_and_read_event(handle, offset, pcpu); } /** * tracecmd_refresh_record - remaps the records data * @handle: input handle for the trace.dat file * @record: the record to be refreshed * * A record data points to a mmap section of memory. * by reading new records the mmap section may be unmapped. * This will refresh the record's data mapping. * * ===== OBSOLETED BY PAGE REFERENCES ===== * * Returns 1 if page is still mapped (does not modify CPU iterator) * 0 on successful mapping (was not mapped before, * This will update CPU iterator to point to * the next record) * -1 on error. */ int tracecmd_refresh_record(struct tracecmd_input *handle, struct tep_record *record) { unsigned long long page_offset; int cpu = record->cpu; struct cpu_data *cpu_data = &handle->cpu_data[cpu]; int index; int ret; page_offset = calc_page_offset(handle, record->offset); index = record->offset & (handle->page_size - 1); ret = get_page(handle, record->cpu, page_offset); if (ret < 0) return -1; /* If the page is still mapped, there's nothing to do */ if (ret) return 1; record->data = kbuffer_read_at_offset(cpu_data->kbuf, index, &record->ts); cpu_data->timestamp = record->ts; return 0; } /** * tracecmd_read_cpu_first - get the first record in a CPU * @handle: input handle for the trace.dat file * @cpu: the CPU to search * * This returns the first (by time) record entry in a given CPU. * * The record returned must be freed. */ struct tep_record * tracecmd_read_cpu_first(struct tracecmd_input *handle, int cpu) { unsigned long long page_offset; int ret; if (cpu >= handle->cpus) return NULL; page_offset = calc_page_offset(handle, handle->cpu_data[cpu].file_offset); ret = get_page(handle, cpu, page_offset); if (ret < 0) return NULL; /* If the page was already mapped, we need to reset it */ if (ret) update_page_info(handle, cpu); free_next(handle, cpu); return tracecmd_read_data(handle, cpu); } /** * tracecmd_iterate_reset - Set the handle to iterate from the beginning * @handle: input handle for the trace.dat file * * This causes tracecmd_iterate_events*() to start from the beginning * of the trace.dat file. */ int tracecmd_iterate_reset(struct tracecmd_input *handle) { unsigned long long page_offset; int cpu; int ret = 0; int r; for (cpu = 0; cpu < handle->cpus; cpu++) { page_offset = calc_page_offset(handle, handle->cpu_data[cpu].file_offset); r = get_page(handle, cpu, page_offset); if (r < 0) { ret = -1; continue; /* ?? */ } /* If the page was already mapped, we need to reset it */ if (r) update_page_info(handle, cpu); free_next(handle, cpu); } return ret; } /** * tracecmd_read_cpu_last - get the last record in a CPU * @handle: input handle for the trace.dat file * @cpu: the CPU to search * * This returns the last (by time) record entry in a given CPU. * * The record returned must be freed. */ struct tep_record * tracecmd_read_cpu_last(struct tracecmd_input *handle, int cpu) { struct tep_record *record = NULL; off_t offset, page_offset; offset = handle->cpu_data[cpu].file_offset + handle->cpu_data[cpu].file_size; if (offset & (handle->page_size - 1)) offset &= ~(handle->page_size - 1); else offset -= handle->page_size; page_offset = offset; again: if (get_page(handle, cpu, page_offset) < 0) return NULL; offset = page_offset; do { tracecmd_free_record(record); record = tracecmd_read_data(handle, cpu); if (record) offset = record->offset; } while (record); record = tracecmd_read_at(handle, offset, NULL); /* * It is possible that a page has just a timestamp * or just padding on it. */ if (!record) { if (page_offset == handle->cpu_data[cpu].file_offset) return NULL; page_offset -= handle->page_size; goto again; } return record; } /** * tracecmd_set_cpu_to_timestamp - set the CPU iterator to a given time * @handle: input handle for the trace.dat file * @cpu: the CPU pointer to set * @ts: the timestamp to set the CPU at. * * This sets the CPU iterator used by tracecmd_read_data and * tracecmd_peek_data to a location in the CPU storage near * a given timestamp. It will try to set the iterator to a time before * the time stamp and not actually at a given time. * * To use this to find a record in a time field, call this function * first, than iterate with tracecmd_read_data to find the records * you need. */ int tracecmd_set_cpu_to_timestamp(struct tracecmd_input *handle, int cpu, unsigned long long ts) { struct cpu_data *cpu_data = &handle->cpu_data[cpu]; off_t start, end, next; if (cpu < 0 || cpu >= handle->cpus) { errno = -EINVAL; return -1; } if (!cpu_data->size) return -1; if (!cpu_data->page) { if (init_cpu(handle, cpu)) return -1; } if (cpu_data->timestamp == ts) { /* * If a record is cached, then that record is most * likely the matching timestamp. Otherwise we need * to start from the beginning of the index; */ if (!cpu_data->next || cpu_data->next->ts != ts) update_page_info(handle, cpu); return 0; } /* Set to the first record on current page */ update_page_info(handle, cpu); if (cpu_data->timestamp < ts) { start = cpu_data->offset; end = cpu_data->file_offset + cpu_data->file_size; if (end & (handle->page_size - 1)) end &= ~(handle->page_size - 1); else end -= handle->page_size; next = end; } else { end = cpu_data->offset; start = cpu_data->file_offset; next = start; } while (start < end) { if (get_page(handle, cpu, next) < 0) return -1; if (cpu_data->timestamp == ts) break; if (cpu_data->timestamp < ts) start = next; else end = next; next = start + (end - start) / 2; next = calc_page_offset(handle, next); /* Prevent an infinite loop if start and end are a page off */ if (next == start) start = next += handle->page_size; } /* * We need to end up on a page before the time stamp. * We go back even if the timestamp is the same. This is because * we want the event with the timestamp, not the page. The page * can start with the timestamp we are looking for, but the event * may be on the previous page. */ if (cpu_data->timestamp >= ts && cpu_data->offset > cpu_data->file_offset) get_page(handle, cpu, cpu_data->offset - handle->page_size); return 0; } /** * tracecmd_set_all_cpus_to_timestamp - set all CPUs iterator to a given time * @handle: input handle for the trace.dat file * @cpu: the CPU pointer to set * @ts: the timestamp to set the CPU at. * * This sets the CPU iterator used by tracecmd_read_data and * tracecmd_peek_data to a location in the CPU storage near * a given timestamp. It will try to set the iterator to a time before * the time stamp and not actually at a given time. * * To use this to find a record in a time field, call this function * first, than iterate with tracecmd_read_next_data to find the records * you need. */ void tracecmd_set_all_cpus_to_timestamp(struct tracecmd_input *handle, unsigned long long time) { int cpu; for (cpu = 0; cpu < handle->cpus; cpu++) tracecmd_set_cpu_to_timestamp(handle, cpu, time); } /** * tracecmd_set_cursor - set the offset for the next tracecmd_read_data * @handle: input handle for the trace.dat file * @cpu: the CPU pointer to set * @offset: the offset to place the cursor * * Set the pointer to the next read or peek. This is useful when * needing to read sequentially and then look at another record * out of sequence without breaking the iteration. This is done with: * * record = tracecmd_peek_data() * offset = record->offset; * record = tracecmd_read_at(); * - do what ever with record - * tracecmd_set_cursor(handle, cpu, offset); * * Now the next tracecmd_peek_data or tracecmd_read_data will return * the original record. */ int tracecmd_set_cursor(struct tracecmd_input *handle, int cpu, size_t offset) { struct cpu_data *cpu_data = &handle->cpu_data[cpu]; unsigned long long page_offset; if (cpu < 0 || cpu >= handle->cpus) return -1; if (offset < cpu_data->file_offset || offset > cpu_data->file_offset + cpu_data->file_size) return -1; /* cpu does not have this offset. */ /* Move this cpu index to point to this offest */ page_offset = calc_page_offset(handle, offset); if (get_page(handle, cpu, page_offset) < 0) return -1; peek_event(handle, offset, cpu); return 0; } /** * tracecmd_get_cursor - get the offset for the next tracecmd_read_data * @handle: input handle for the trace.dat file * @cpu: the CPU pointer to get the cursor from * * Returns the offset of the next record that would be read. */ unsigned long long tracecmd_get_cursor(struct tracecmd_input *handle, int cpu) { struct cpu_data *cpu_data = &handle->cpu_data[cpu]; struct kbuffer *kbuf = cpu_data->kbuf; if (cpu < 0 || cpu >= handle->cpus) return 0; /* * Use the next pointer if it exists and matches the * current timestamp. */ if (cpu_data->next && cpu_data->next->ts == cpu_data->timestamp) return cpu_data->next->offset; /* * Either the next point does not exist, or it does * not match the timestamp. The next read will use the * current page. * * If the offset is at the end, then return that. */ if (cpu_data->offset >= cpu_data->file_offset + cpu_data->file_size) return cpu_data->offset; return cpu_data->offset + kbuffer_curr_offset(kbuf); } /** * tracecmd_translate_data - create a record from raw data * @handle: input handle for the trace.dat file * @ptr: raw data to read * @size: the size of the data * * This function tries to create a record from some given * raw data. The data does not need to be from the trace.dat file. * It can be stored from another location. * * Note, since the timestamp is calculated from within the trace * buffer, the timestamp for the record will be zero, since it * can't calculate it. * * The record returned must be freed. */ struct tep_record * tracecmd_translate_data(struct tracecmd_input *handle, void *ptr, int size) { struct tep_handle *pevent = handle->pevent; struct tep_record *record; unsigned int length; int swap = 1; /* minimum record read is 8, (warn?) (TODO: make 8 into macro) */ if (size < 8) return NULL; record = malloc(sizeof(*record)); if (!record) return NULL; memset(record, 0, sizeof(*record)); record->ref_count = 1; if (tep_is_local_bigendian(pevent) == tep_is_file_bigendian(pevent)) swap = 0; record->data = kbuffer_translate_data(swap, ptr, &length); record->size = length; if (record->data) record->record_size = record->size + (record->data - ptr); return record; } /** * tracecmd_peek_data - return the record at the current location. * @handle: input handle for the trace.dat file * @cpu: the CPU to pull from * * This returns the record at the current location of the CPU * iterator. It does not increment the CPU iterator. */ struct tep_record * tracecmd_peek_data(struct tracecmd_input *handle, int cpu) { struct tep_record *record; unsigned long long ts; struct kbuffer *kbuf; struct page *page; int index; void *data; if (cpu >= handle->cpus) return NULL; page = handle->cpu_data[cpu].page; kbuf = handle->cpu_data[cpu].kbuf; /* Hack to work around function graph read ahead */ tracecmd_curr_thread_handle = handle; if (handle->cpu_data[cpu].next) { record = handle->cpu_data[cpu].next; if (!record->data) { tracecmd_critical("Something freed the record"); return NULL; } if (handle->cpu_data[cpu].timestamp == record->ts) return record; /* * The timestamp changed, which means the cached * record is no longer valid. Reread a new record. */ free_next(handle, cpu); } read_again: if (!page) { if (handle->use_pipe) { get_next_page(handle, cpu); page = handle->cpu_data[cpu].page; } if (!page) return NULL; } data = kbuffer_read_event(kbuf, &ts); if (!data) { if (get_next_page(handle, cpu)) return NULL; page = handle->cpu_data[cpu].page; goto read_again; } handle->cpu_data[cpu].timestamp = timestamp_calc(ts, cpu, handle); index = kbuffer_curr_offset(kbuf); record = malloc(sizeof(*record)); if (!record) return NULL; memset(record, 0, sizeof(*record)); record->ts = handle->cpu_data[cpu].timestamp; record->size = kbuffer_event_size(kbuf); record->cpu = handle->cpu_data[cpu].cpu; record->data = data; record->offset = handle->cpu_data[cpu].offset + index; record->missed_events = kbuffer_missed_events(kbuf); record->ref_count = 1; record->locked = 1; handle->cpu_data[cpu].next = record; record->record_size = kbuffer_curr_size(kbuf); record->priv = page; add_record(page, record); page->ref_count++; kbuffer_next_event(kbuf, NULL); return record; } /** * tracecmd_read_data - read the next record and increment * @handle: input handle for the trace.dat file * @cpu: the CPU to pull from * * This returns the record at the current location of the CPU * iterator and increments the CPU iterator. * * The record returned must be freed. */ struct tep_record * tracecmd_read_data(struct tracecmd_input *handle, int cpu) { struct tep_record *record; if (cpu >= handle->cpus) return NULL; record = tracecmd_peek_data(handle, cpu); handle->cpu_data[cpu].next = NULL; if (record) { record->locked = 0; #if DEBUG_RECORD record->alloc_addr = (unsigned long)__builtin_return_address(0); #endif } return record; } /** * tracecmd_read_next_data - read the next record * @handle: input handle to the trace.dat file * @rec_cpu: return pointer to the CPU that the record belongs to * * This returns the next record by time. This is different than * tracecmd_read_data in that it looks at all CPUs. It does a peek * at each CPU and the record with the earliest time stame is * returned. If @rec_cpu is not NULL it gets the CPU id the record was * on. The CPU cursor of the returned record is moved to the * next record. * * Multiple reads of this function will return a serialized list * of all records for all CPUs in order of time stamp. * * The record returned must be freed. */ struct tep_record * tracecmd_read_next_data(struct tracecmd_input *handle, int *rec_cpu) { struct tep_record *record; int next_cpu; record = tracecmd_peek_next_data(handle, &next_cpu); if (!record) return NULL; if (rec_cpu) *rec_cpu = next_cpu; return tracecmd_read_data(handle, next_cpu); } /** * tracecmd_follow_event - Add callback for specific events for iterators * @handle: The handle to get a callback from * @system: The system of the event to track * @event_name: The name of the event to track * @callback: The function to call when the event is hit in an iterator * @callback_data: The data to pass to @callback * * This attaches a callback to @handle where if tracecmd_iterate_events() * or tracecmd_iterate_events_multi() is called, that if the specified * event is hit, it will call @callback, with the following parameters: * @handle: Same handle as passed to this function. * @event: The event pointer that was found by @system and @event_name. * @record; The event instance of @event. * @cpu: The cpu that the event happened on. * @callback_data: The same as @callback_data passed to the function. * * Note that when used with tracecmd_iterate_events_multi() that @cpu * may be the nth CPU of all handles it is processing, so if the CPU * that the @record is on is desired, then use @record->cpu. * * Returns 0 on success and -1 on error. */ int tracecmd_follow_event(struct tracecmd_input *handle, const char *system, const char *event_name, int (*callback)(struct tracecmd_input *handle, struct tep_event *, struct tep_record *, int, void *), void *callback_data) { struct tep_handle *tep = tracecmd_get_tep(handle); struct follow_event *followers; struct follow_event follow; if (!tep) { errno = EINVAL; return -1; } follow.event = tep_find_event_by_name(tep, system, event_name); if (!follow.event) { errno = ENOENT; return -1; } follow.callback = callback; follow.callback_data = callback_data; followers = realloc(handle->followers, sizeof(*followers) * (handle->nr_followers + 1)); if (!followers) return -1; handle->followers = followers; followers[handle->nr_followers++] = follow; return 0; } /** * tracecmd_follow_missed_events - Add callback for missed events for iterators * @handle: The handle to get a callback from * @callback: The function to call when missed events is detected * @callback_data: The data to pass to @callback * * This attaches a callback to @handle where if tracecmd_iterate_events() * or tracecmd_iterate_events_multi() is called, that if missed events * is detected, it will call @callback, with the following parameters: * @handle: Same handle as passed to this function. * @event: The event pointer of the record with the missing events * @record; The event instance of @event. * @cpu: The cpu that the event happened on. * @callback_data: The same as @callback_data passed to the function. * * Note that when used with tracecmd_iterate_events_multi() that @cpu * may be the nth CPU of all handles it is processing, so if the CPU * that the @record is on is desired, then use @record->cpu. * * If the count of missing events is available, @record->missed_events * will have a positive number holding the number of missed events since * the last event on the same CPU, or just -1 if that number is unknown * but missed events did happen. * * Returns 0 on success and -1 on error. */ int tracecmd_follow_missed_events(struct tracecmd_input *handle, int (*callback)(struct tracecmd_input *handle, struct tep_event *, struct tep_record *, int, void *), void *callback_data) { struct follow_event *followers; struct follow_event follow; follow.event = NULL; follow.callback = callback; follow.callback_data = callback_data; followers = realloc(handle->missed_followers, sizeof(*followers) * (handle->nr_missed_followers + 1)); if (!followers) return -1; handle->missed_followers = followers; followers[handle->nr_missed_followers++] = follow; return 0; } static int call_followers(struct tracecmd_input *handle, struct tep_record *record, int cpu) { struct tep_handle *tep = tracecmd_get_tep(handle); struct follow_event *followers = handle->followers; struct tep_event *event; int ret = 0; int i; event = tep_find_event_by_record(tep, record); if (!event) return -1; for (i = 0; i < handle->nr_followers; i++) { if (handle->followers[i].event == event) ret |= followers[i].callback(handle, event, record, cpu, followers[i].callback_data); } return ret; } static int call_missed_events(struct tracecmd_input *handle, struct tep_record *record, int cpu) { struct tep_handle *tep = tracecmd_get_tep(handle); struct follow_event *followers = handle->missed_followers; struct tep_event *event; int ret = 0; int i; event = tep_find_event_by_record(tep, record); if (!event) return -1; for (i = 0; i < handle->nr_missed_followers; i++) { ret |= followers[i].callback(handle, event, record, cpu, followers[i].callback_data); } return ret; } static int call_callbacks(struct tracecmd_input *handle, struct tep_record *record, int next_cpu, int (*callback)(struct tracecmd_input *handle, struct tep_record *, int, void *), void *callback_data) { int ret = 0; if (!record) return 0; if (record->missed_events) ret = call_missed_events(handle, record, next_cpu); if (ret) return ret; if (!handle->filter || tracecmd_filter_match(handle->filter, record) == TRACECMD_FILTER_MATCH) { if (handle->nr_followers) ret = call_followers(handle, record, next_cpu); if (!ret && callback) ret = callback(handle, record, next_cpu, callback_data); } return ret; } /** * tracecmd_iterate_events - iterate events over a given handle * @handle: The handle to iterate over * @cpus: The CPU set to filter on (NULL for all CPUs) * @cpu_size: The size of @cpus (ignored if @cpus is NULL) * @callback: The callback function for each event * @callback_data: The data to pass to the @callback. * * Will loop over all events in @handle (filtered by the given @cpus), * and will call @callback for each event in order of the event's records * timestamp. * * Returns the -1 on error, or the value of the callbacks. */ int tracecmd_iterate_events(struct tracecmd_input *handle, cpu_set_t *cpus, int cpu_size, int (*callback)(struct tracecmd_input *handle, struct tep_record *, int, void *), void *callback_data) { struct tep_record *record; unsigned long long *timestamps; unsigned long long ts, last_timestamp = 0; int *cpu_list; int cpu_count = 0; int next_cpu; int cpu; int ret = 0; int i; if (!callback && !handle->nr_followers) { errno = EINVAL; return -1; } timestamps = calloc(handle->cpus, sizeof(*timestamps)); if (!timestamps) return -1; cpu_list = calloc(handle->cpus, sizeof(*cpu_list)); if (!cpu_list) { free(timestamps); return -1; } for (cpu = 0; cpu < handle->cpus; cpu++) { if (cpus && !CPU_ISSET_S(cpu, cpu_size, cpus)) continue; cpu_list[cpu_count++] = cpu; } for (i = 0; i < cpu_count; i++) { cpu = cpu_list[i]; record = tracecmd_peek_data(handle, cpu); timestamps[cpu] = record ? record->ts : -1ULL; } do { next_cpu = -1; for (i = 0; i < cpu_count; i++) { cpu = cpu_list[i]; ts = timestamps[cpu]; if (ts == -1ULL) continue; if (next_cpu < 0 || ts < last_timestamp) { next_cpu = cpu; last_timestamp = ts; } } if (next_cpu >= 0) { record = tracecmd_peek_data(handle, next_cpu); /* Make sure the record is still what we expect it to be */ if (!record || record->ts != last_timestamp) { timestamps[next_cpu] = record ? record->ts : -1ULL; continue; } /* Need to call read_data to increment to the next record */ record = tracecmd_read_data(handle, next_cpu); ret = call_callbacks(handle, record, next_cpu, callback, callback_data); tracecmd_free_record(record); record = tracecmd_peek_data(handle, next_cpu); timestamps[next_cpu] = record ? record->ts : -1ULL; } } while (next_cpu >= 0 && ret == 0); free(timestamps); free(cpu_list); return ret; } static struct tep_record * load_records(struct tracecmd_input *handle, int cpu, unsigned long long page_offset, unsigned long long start_offset) { struct tep_record *last_record = NULL; struct tep_record *record; unsigned long long page_end = page_offset + handle->page_size; if (get_page(handle, cpu, page_offset) < 0) return NULL; update_page_info(handle, cpu); if (start_offset) page_end = start_offset + 1; for (;;) { record = tracecmd_read_data(handle, cpu); if (!record || record->offset >= page_end) { /* Make sure the cpu_data page is still valid */ get_page(handle, cpu, page_offset); tracecmd_free_record(record); break; } /* * Hijack the record->priv, as we know that it points * to handle->cpu_data[cpu].page, and use that as * a link list of all the records on this page going * backwards. */ record->priv = last_record; last_record = record; } return last_record; } static void initialize_last_events(struct tracecmd_input *handle, struct tep_record **last_records, cpu_set_t *cpu_set, int cpu_size, int cpus, bool cont) { unsigned long long page_offset; unsigned long long start_offset = 0; struct tep_record *record; int cpu; for (cpu = 0; cpu < cpus; cpu++) { if (cpu_set && !CPU_ISSET_S(cpu, cpu_size, cpu_set)) continue; if (!handle->cpu_data[cpu].file_size) continue; if (cont) { record = tracecmd_read_data(handle, cpu); if (record) page_offset = start_offset = record->offset; tracecmd_free_record(record); } if (!start_offset) { /* Find the start of the last page for this CPU */ page_offset = handle->cpu_data[cpu].file_offset + handle->cpu_data[cpu].file_size; } page_offset = calc_page_offset(handle, page_offset - 1); last_records[cpu] = load_records(handle, cpu, page_offset, start_offset); } } static struct tep_record *peek_last_event(struct tracecmd_input *handle, struct tep_record **last_records, int cpu) { struct tep_record *record = last_records[cpu]; struct page *page = handle->cpu_data[cpu].page; unsigned long long page_offset; if (record) return record; /* page can be NULL if the size is zero */ if (!page) return NULL; page_offset = page->offset - handle->page_size; if (page_offset < handle->cpu_data[cpu].file_offset) return NULL; last_records[cpu] = load_records(handle, cpu, page_offset, 0); return peek_last_event(handle, last_records, cpu); } static struct tep_record *next_last_event(struct tracecmd_input *handle, struct tep_record **last_records, int cpu) { struct tep_record *record = last_records[cpu]; struct page *page = handle->cpu_data[cpu].page; if (!record) return NULL; last_records[cpu] = record->priv; record->priv = page; return record; } /** * tracecmd_iterate_events_reverse - iterate events over a given handle backwards * @handle: The handle to iterate over * @cpus: The CPU set to filter on (NULL for all CPUs) * @cpu_size: The size of @cpus (ignored if @cpus is NULL) * @callback: The callback function for each event * @callback_data: The data to pass to the @callback. * @cont: If true, start where it left off, otherwise start at the end. * * Will loop over all events in @handle (filtered by the given @cpus), * and will call @callback for each event in reverse order. * * Returns the -1 on error, or the value of the callbacks. */ int tracecmd_iterate_events_reverse(struct tracecmd_input *handle, cpu_set_t *cpus, int cpu_size, int (*callback)(struct tracecmd_input *handle, struct tep_record *, int, void *), void *callback_data, bool cont) { unsigned long long last_timestamp = 0; struct tep_record **records; struct tep_record *record; int next_cpu; int max_cpus = handle->cpus; int cpu; int ret = 0; if (!callback && !handle->nr_followers) { errno = EINVAL; return -1; } records = calloc(max_cpus, sizeof(*records)); if (!records) return -1; initialize_last_events(handle, records, cpus, cpu_size, max_cpus, cont); do { next_cpu = -1; for (cpu = 0; cpu < max_cpus; cpu++) { if (cpus && !CPU_ISSET_S(cpu, cpu_size, cpus)) continue; record = peek_last_event(handle, records, cpu); if (!record) continue; if (next_cpu < 0 || record->ts > last_timestamp) { next_cpu = cpu; last_timestamp = record->ts; } } if (next_cpu >= 0) { record = next_last_event(handle, records, next_cpu);; ret = call_callbacks(handle, record, next_cpu, callback, callback_data); tracecmd_free_record(record); } } while (next_cpu >= 0 && ret == 0); free(records); return ret; } struct record_handle { unsigned long long ts; struct tracecmd_input *handle; }; /** * tracecmd_iterate_events_multi - iterate events over multiple handles * @handles: An array of handles to iterate over * @nr_handles: The number of handles in the @handles array. * @callback: The callback function for each event * @callback_data: The data to pass to the @callback. * * Will loop over all CPUs for each handle in @handles and call the * @callback in the order of the timestamp for each event's record * for each handle. * * Returns the -1 on error, or the value of the callbacks. */ int tracecmd_iterate_events_multi(struct tracecmd_input **handles, int nr_handles, int (*callback)(struct tracecmd_input *handle, struct tep_record *, int, void *), void *callback_data) { struct tracecmd_input *handle; struct record_handle *records; struct tep_record *record; unsigned long long ts, last_timestamp = 0; int next_cpu; int cpus = 0; int all_cpus = 0; int cpu; int i; int ret = 0; for (i = 0; i < nr_handles; i++) { handle = handles[i]; cpus += handle->cpus; } records = calloc(cpus, sizeof(*records)); if (!records) return -1; for (i = 0; i < nr_handles; i++) { handle = handles[i]; handle->start_cpu = all_cpus; for (cpu = 0; cpu < handle->cpus; cpu++) { record = tracecmd_peek_data(handle, cpu); records[all_cpus + cpu].ts = record ? record->ts : -1ULL; records[all_cpus + cpu].handle = handle; } all_cpus += cpu; } do { next_cpu = -1; for (cpu = 0; cpu < all_cpus; cpu++) { ts = records[cpu].ts; if (ts == -1ULL) continue; if (next_cpu < 0 || ts < last_timestamp) { next_cpu = cpu; last_timestamp = ts; } } if (next_cpu >= 0) { handle = records[next_cpu].handle; cpu = next_cpu - handle->start_cpu; /* Refresh record as callback could have changed */ record = tracecmd_peek_data(handle, cpu); /* If the record updated, try again */ if (!record || record->ts != last_timestamp) { records[next_cpu].ts = record ? record->ts : -1ULL; continue; } /* Need to call read_data to increment to the next record */ record = tracecmd_read_data(handle, cpu); ret = call_callbacks(handle, record, next_cpu, callback, callback_data); tracecmd_free_record(record); } } while (next_cpu >= 0 && ret == 0); free(records); return ret; } /** * tracecmd_peek_next_data - return the next record * @handle: input handle to the trace.dat file * @rec_cpu: return pointer to the CPU that the record belongs to * * This returns the next record by time. This is different than * tracecmd_peek_data in that it looks at all CPUs. It does a peek * at each CPU and the record with the earliest time stame is * returned. If @rec_cpu is not NULL it gets the CPU id the record was * on. It does not increment the CPU iterator. */ struct tep_record * tracecmd_peek_next_data(struct tracecmd_input *handle, int *rec_cpu) { unsigned long long ts; struct tep_record *record, *next_record = NULL; int next_cpu; int cpu; if (rec_cpu) *rec_cpu = -1; next_cpu = -1; ts = 0; for (cpu = 0; cpu < handle->cpus; cpu++) { record = tracecmd_peek_data(handle, cpu); if (record && (!next_record || record->ts < ts)) { ts = record->ts; next_cpu = cpu; next_record = record; } } if (next_record) { if (rec_cpu) *rec_cpu = next_cpu; return next_record; } return NULL; } /** * tracecmd_read_prev - read the record before the given record * @handle: input handle to the trace.dat file * @record: the record to use to find the previous record. * * This returns the record before the @record on its CPU. If * @record is the first record, NULL is returned. The cursor is set * as if the previous record was read by tracecmd_read_data(). * * @record can not be NULL, otherwise NULL is returned; the * record ownership goes to this function. * * Note, this is not that fast of an algorithm, since it needs * to build the timestamp for the record. * * The record returned must be freed with tracecmd_free_record(). */ struct tep_record * tracecmd_read_prev(struct tracecmd_input *handle, struct tep_record *record) { unsigned long long offset, page_offset;; struct cpu_data *cpu_data; int index; int cpu; if (!record) return NULL; cpu = record->cpu; offset = record->offset; cpu_data = &handle->cpu_data[cpu]; page_offset = calc_page_offset(handle, offset); index = offset - page_offset; /* Note, the record passed in could have been a peek */ free_next(handle, cpu); /* Reset the cursor */ /* Should not happen */ if (get_page(handle, cpu, page_offset) < 0) return NULL; update_page_info(handle, cpu); /* Find the record before this record */ index = 0; for (;;) { record = tracecmd_read_data(handle, cpu); /* Should not happen! */ if (!record) return NULL; if (record->offset == offset) break; index = record->offset - page_offset; tracecmd_free_record(record); } tracecmd_free_record(record); if (index) /* we found our record */ return tracecmd_read_at(handle, page_offset + index, NULL); /* reset the index to start at the beginning of the page */ update_page_info(handle, cpu); /* The previous record is on the previous page */ for (;;) { /* check if this is the first page */ if (page_offset == cpu_data->file_offset) return NULL; page_offset -= handle->page_size; /* Updating page to a new page will reset index to 0 */ get_page(handle, cpu, page_offset); record = NULL; index = 0; do { if (record) { index = record->offset - page_offset; tracecmd_free_record(record); } record = tracecmd_read_data(handle, cpu); /* Should not happen */ if (!record) return NULL; } while (record->offset != offset); tracecmd_free_record(record); if (index) /* we found our record */ return tracecmd_read_at(handle, page_offset + index, NULL); } /* Not reached */ } static int init_cpu_zfile(struct tracecmd_input *handle, int cpu) { struct cpu_data *cpu_data; off_t offset; size_t size; cpu_data = &handle->cpu_data[cpu]; offset = lseek(handle->fd, 0, SEEK_CUR); if (lseek(handle->fd, cpu_data->file_offset, SEEK_SET) == (off_t)-1) return -1; strcpy(cpu_data->compress.file, COMPR_TEMP_FILE); cpu_data->compress.fd = mkstemp(cpu_data->compress.file); if (cpu_data->compress.fd < 0) return -1; if (tracecmd_uncompress_copy_to(handle->compress, cpu_data->compress.fd, NULL, &size)) return -1; if (lseek(handle->fd, offset, SEEK_SET) == (off_t)-1) return -1; cpu_data->file_offset = handle->next_offset; handle->next_offset = (handle->next_offset + size + handle->page_size - 1) & ~(handle->page_size - 1); cpu_data->offset = cpu_data->file_offset; cpu_data->file_size = size; cpu_data->size = size; return 0; } static int init_cpu_zpage(struct tracecmd_input *handle, int cpu) { struct cpu_data *cpu_data = &handle->cpu_data[cpu]; int count; int i; if (lseek(handle->fd, cpu_data->file_offset, SEEK_SET) == (off_t)-1) return -1; count = tracecmd_load_chunks_info(handle->compress, &cpu_data->compress.chunks); if (count < 0) return -1; cpu_data->compress.count = count; cpu_data->compress.last_chunk = 0; cpu_data->file_offset = handle->next_offset; cpu_data->file_size = 0; for (i = 0; i < count; i++) cpu_data->file_size += cpu_data->compress.chunks[i].size; cpu_data->offset = cpu_data->file_offset; cpu_data->size = cpu_data->file_size; handle->next_offset = (handle->next_offset + cpu_data->file_size + handle->page_size - 1) & ~(handle->page_size - 1); return 0; } static int compress_cmp(const struct trace_rbtree_node *A, const struct trace_rbtree_node *B) { const struct zchunk_cache *cacheA; const struct zchunk_cache *cacheB; cacheA = container_of(A, struct zchunk_cache, node); cacheB = container_of(B, struct zchunk_cache, node); return chunk_cmp(cacheA->chunk, cacheB->chunk); } static int compress_search(const struct trace_rbtree_node *A, const void *data) { const struct zchunk_cache *cache; off_t offset = *(off_t *)data; cache = container_of(A, struct zchunk_cache, node); if (CHUNK_CHECK_OFFSET(cache->chunk, offset)) return 0; if (cache->chunk->offset < offset) return -1; return 1; } static int init_cpu(struct tracecmd_input *handle, int cpu) { struct cpu_data *cpu_data = &handle->cpu_data[cpu]; int ret; int i; if (handle->cpu_compressed && cpu_data->file_size > 0) { if (handle->read_zpage) ret = init_cpu_zpage(handle, cpu); else ret = init_cpu_zfile(handle, cpu); if (ret) return ret; } else { cpu_data->offset = cpu_data->file_offset; cpu_data->size = cpu_data->file_size; } cpu_data->timestamp = 0; list_head_init(&cpu_data->page_maps); trace_rbtree_init(&cpu_data->compress.cache, compress_cmp, compress_search); if (!cpu_data->size) { tracecmd_info("CPU %d is empty", cpu); return 0; } cpu_data->nr_pages = (cpu_data->size + handle->page_size - 1) / handle->page_size; if (!cpu_data->nr_pages) cpu_data->nr_pages = 1; cpu_data->pages = calloc(cpu_data->nr_pages, sizeof(*cpu_data->pages)); if (!cpu_data->pages) return -1; if (handle->use_pipe) { /* Just make a page, it will be nuked later */ cpu_data->page = malloc(sizeof(*cpu_data->page)); if (!cpu_data->page) goto fail; memset(cpu_data->page, 0, sizeof(*cpu_data->page)); cpu_data->pages[0] = cpu_data->page; cpu_data->page_cnt = 1; cpu_data->page->ref_count = 1; return 0; } cpu_data->page = allocate_page(handle, cpu, cpu_data->offset); if (!cpu_data->page && !handle->read_page) { perror("mmap"); fprintf(stderr, "Can not mmap file, will read instead\n"); if (cpu) { /* * If the other CPUs had size and was able to mmap * then bail. */ for (i = 0; i < cpu; i++) { if (handle->cpu_data[i].size) goto fail; } } /* try again without mmapping, just read it directly */ handle->read_page = true; cpu_data->page = allocate_page(handle, cpu, cpu_data->offset); if (!cpu_data->page) /* Still no luck, bail! */ goto fail; } if (update_page_info(handle, cpu)) goto fail; cpu_data->first_ts = cpu_data->timestamp; return 0; fail: free(cpu_data->pages); cpu_data->pages = NULL; free(cpu_data->page); cpu_data->page = NULL; return -1; } void tracecmd_set_ts_offset(struct tracecmd_input *handle, long long offset) { handle->ts_offset = offset; } /** * tracecmd_add_ts_offset - Add value to the offset which will be applied to the timestamps of all * events from given trace file * @handle: input handle to the trace.dat file * @offset: value, that will be added to the offset */ void tracecmd_add_ts_offset(struct tracecmd_input *handle, long long offset) { handle->ts_offset += offset; } void tracecmd_set_ts2secs(struct tracecmd_input *handle, unsigned long long hz) { double ts2secs; ts2secs = (double)NSEC_PER_SEC / (double)hz; handle->ts2secs = ts2secs; handle->use_trace_clock = false; } static int tsync_offset_cmp(const void *a, const void *b) { struct ts_offset_sample *ts_a = (struct ts_offset_sample *)a; struct ts_offset_sample *ts_b = (struct ts_offset_sample *)b; if (ts_a->time > ts_b->time) return 1; if (ts_a->time < ts_b->time) return -1; return 0; } #define safe_read(R, C) \ do { \ if ((C) > size) \ return -EFAULT; \ (R) = tep_read_number(tep, buf, (C)); \ buf += (C); \ size -= (C); \ } while (0) #define safe_read_loop(type) \ do { \ int ii; \ for (ii = 0; ii < ts_offsets->ts_samples_count; ii++) \ safe_read(ts_offsets->ts_samples[ii].type, 8); \ } while (0) static int tsync_cpu_offsets_load(struct tracecmd_input *handle, char *buf, int size) { struct tep_handle *tep = handle->pevent; struct timesync_offsets *ts_offsets; int i, j, k; safe_read(handle->host.cpu_count, 4); handle->host.ts_offsets = calloc(handle->host.cpu_count, sizeof(struct timesync_offsets)); if (!handle->host.ts_offsets) return -ENOMEM; for (i = 0; i < handle->host.cpu_count; i++) { ts_offsets = &handle->host.ts_offsets[i]; safe_read(ts_offsets->ts_samples_count, 4); ts_offsets->ts_samples = calloc(ts_offsets->ts_samples_count, sizeof(struct ts_offset_sample)); if (!ts_offsets->ts_samples) return -ENOMEM; safe_read_loop(time); safe_read_loop(offset); safe_read_loop(scaling); } if (size > 0) { for (i = 0; i < handle->host.cpu_count; i++) { ts_offsets = &handle->host.ts_offsets[i]; safe_read_loop(fraction); } } for (i = 0; i < handle->host.cpu_count; i++) { ts_offsets = &handle->host.ts_offsets[i]; qsort(ts_offsets->ts_samples, ts_offsets->ts_samples_count, sizeof(struct ts_offset_sample), tsync_offset_cmp); /* Filter possible samples with equal time */ for (k = 0, j = 0; k < ts_offsets->ts_samples_count; k++) { if (k == 0 || ts_offsets->ts_samples[k].time != ts_offsets->ts_samples[k-1].time) ts_offsets->ts_samples[j++] = ts_offsets->ts_samples[k]; } ts_offsets->ts_samples_count = j; } return 0; } static void trace_tsync_offset_free(struct host_trace_info *host) { int i; if (host->ts_offsets) { for (i = 0; i < host->cpu_count; i++) free(host->ts_offsets[i].ts_samples); free(host->ts_offsets); host->ts_offsets = NULL; } } static int trace_pid_map_cmp(const void *a, const void *b) { struct tracecmd_proc_addr_map *m_a = (struct tracecmd_proc_addr_map *)a; struct tracecmd_proc_addr_map *m_b = (struct tracecmd_proc_addr_map *)b; if (m_a->start > m_b->start) if (m_a->start < m_b->start) return -1; return 0; } static void procmap_free(struct pid_addr_maps *maps) { int i; if (!maps) return; if (maps->lib_maps) { for (i = 0; i < maps->nr_lib_maps; i++) free(maps->lib_maps[i].lib_name); free(maps->lib_maps); } free(maps->proc_name); free(maps); } static void trace_guests_free(struct tracecmd_input *handle) { struct guest_trace_info *guest; while (handle->guest) { guest = handle->guest; handle->guest = handle->guest->next; free(guest->name); free(guest->cpu_pid); free(guest); } } static int trace_guest_load(struct tracecmd_input *handle, char *buf, int size) { struct guest_trace_info *guest = NULL; int cpu; int i; guest = calloc(1, sizeof(struct guest_trace_info)); if (!guest) goto error; /* * Guest name, null terminated string * long long (8 bytes) trace-id * int (4 bytes) number of guest CPUs * array of size number of guest CPUs: * int (4 bytes) Guest CPU id * int (4 bytes) Host PID, running the guest CPU */ guest->name = strndup(buf, size); if (!guest->name) goto error; buf += strlen(guest->name) + 1; size -= strlen(guest->name) + 1; if (size < sizeof(long long)) goto error; guest->trace_id = tep_read_number(handle->pevent, buf, sizeof(long long)); buf += sizeof(long long); size -= sizeof(long long); if (size < sizeof(int)) goto error; guest->vcpu_count = tep_read_number(handle->pevent, buf, sizeof(int)); buf += sizeof(int); size -= sizeof(int); guest->cpu_pid = calloc(guest->vcpu_count, sizeof(int)); if (!guest->cpu_pid) goto error; for (i = 0; i < guest->vcpu_count; i++) { if (size < 2 * sizeof(int)) goto error; cpu = tep_read_number(handle->pevent, buf, sizeof(int)); buf += sizeof(int); if (cpu >= guest->vcpu_count) goto error; guest->cpu_pid[cpu] = tep_read_number(handle->pevent, buf, sizeof(int)); buf += sizeof(int); size -= 2 * sizeof(int); } guest->next = handle->guest; handle->guest = guest; return 0; error: if (guest) { free(guest->cpu_pid); free(guest->name); free(guest); } return -1; } /* Needs to be a constant, and 4K should be good enough */ #define STR_PROCMAP_LINE_MAX 4096 static int trace_pid_map_load(struct tracecmd_input *handle, char *buf) { struct pid_addr_maps *maps = NULL; char mapname[STR_PROCMAP_LINE_MAX+1]; char *line; int res; int ret; int i; maps = calloc(1, sizeof(*maps)); if (!maps) return -ENOMEM; ret = -EINVAL; line = strchr(buf, '\n'); if (!line) goto out_fail; *line = '\0'; if (strlen(buf) > STR_PROCMAP_LINE_MAX) goto out_fail; res = sscanf(buf, "%x %x %"STRINGIFY(STR_PROCMAP_LINE_MAX)"s", &maps->pid, &maps->nr_lib_maps, mapname); if (res != 3) goto out_fail; ret = -ENOMEM; maps->proc_name = strdup(mapname); if (!maps->proc_name) goto out_fail; maps->lib_maps = calloc(maps->nr_lib_maps, sizeof(struct tracecmd_proc_addr_map)); if (!maps->lib_maps) goto out_fail; buf = line + 1; line = strchr(buf, '\n'); for (i = 0; i < maps->nr_lib_maps; i++) { if (!line) break; *line = '\0'; if (strlen(buf) > STR_PROCMAP_LINE_MAX) break; res = sscanf(buf, "%zx %zx %s", &maps->lib_maps[i].start, &maps->lib_maps[i].end, mapname); if (res != 3) break; maps->lib_maps[i].lib_name = strdup(mapname); if (!maps->lib_maps[i].lib_name) goto out_fail; buf = line + 1; line = strchr(buf, '\n'); } ret = -EINVAL; if (i != maps->nr_lib_maps) goto out_fail; qsort(maps->lib_maps, maps->nr_lib_maps, sizeof(*maps->lib_maps), trace_pid_map_cmp); maps->next = handle->pid_maps; handle->pid_maps = maps; return 0; out_fail: procmap_free(maps); return ret; } static void trace_pid_map_free(struct pid_addr_maps *maps) { struct pid_addr_maps *del; while (maps) { del = maps; maps = maps->next; procmap_free(del); } } static int trace_pid_map_search(const void *a, const void *b) { struct tracecmd_proc_addr_map *key = (struct tracecmd_proc_addr_map *)a; struct tracecmd_proc_addr_map *map = (struct tracecmd_proc_addr_map *)b; if (key->start >= map->end) return 1; if (key->start < map->start) return -1; return 0; } /** * tracecmd_search_task_map - Search task memory address map * @handle: input handle to the trace.dat file * @pid: pid of the task * @addr: address from the task memory space. * * Map of the task memory can be saved in the trace.dat file, using the option * "--proc-map". If there is such information, this API can be used to look up * into this memory map to find what library is loaded at the given @addr. * * A pointer to struct tracecmd_proc_addr_map is returned, containing the name * of the library at given task @addr and the library start and end addresses. */ struct tracecmd_proc_addr_map * tracecmd_search_task_map(struct tracecmd_input *handle, int pid, unsigned long long addr) { struct tracecmd_proc_addr_map *lib; struct tracecmd_proc_addr_map key; struct pid_addr_maps *maps; if (!handle || !handle->pid_maps) return NULL; maps = handle->pid_maps; while (maps) { if (maps->pid == pid) break; maps = maps->next; } if (!maps || !maps->nr_lib_maps || !maps->lib_maps) return NULL; key.start = addr; lib = bsearch(&key, maps->lib_maps, maps->nr_lib_maps, sizeof(*maps->lib_maps), trace_pid_map_search); return lib; } __hidden unsigned int get_meta_strings_size(struct tracecmd_input *handle) { return handle->strings_size; } __hidden unsigned long long get_last_option_offset(struct tracecmd_input *handle) { return handle->options_last_offset; } static int handle_option_done(struct tracecmd_input *handle, char *buf, int size) { unsigned long long offset; if (size < 8) return -1; offset = lseek(handle->fd, 0, SEEK_CUR); if (offset >= size) handle->options_last_offset = offset - size; offset = tep_read_number(handle->pevent, buf, 8); if (!offset) return 0; if (lseek(handle->fd, offset, SEEK_SET) == (off_t)-1) return -1; return handle_options(handle); } static inline int save_read_number(struct tep_handle *tep, char *data, int *data_size, int *read_pos, int bytes, unsigned long long *num) { if (bytes > *data_size) return -1; *num = tep_read_number(tep, (data + *read_pos), bytes); *read_pos += bytes; *data_size -= bytes; return 0; } static inline char *save_read_string(char *data, int *data_size, int *read_pos) { char *str; if (*data_size < 1) return NULL; str = strdup(data + *read_pos); if (!str) return NULL; *data_size -= (strlen(str) + 1); if (*data_size < 0) { free(str); return NULL; } *read_pos += (strlen(str) + 1); return str; } static int handle_buffer_option(struct tracecmd_input *handle, unsigned short id, char *data, int size) { struct input_buffer_instance *buff; struct cpu_file_data *cpu_data; unsigned long long tmp; long long max_cpu = -1; int rsize = 0; char *name; int i; if (save_read_number(handle->pevent, data, &size, &rsize, 8, &tmp)) return -1; name = save_read_string(data, &size, &rsize); if (!name) return -1; if (*name == '\0') { /* top buffer */ buff = &handle->top_buffer; } else { buff = realloc(handle->buffers, sizeof(*handle->buffers) * (handle->nr_buffers + 1)); if (!buff) { free(name); return -1; } handle->buffers = buff; handle->nr_buffers++; buff = &handle->buffers[handle->nr_buffers - 1]; } memset(buff, 0, sizeof(struct input_buffer_instance)); buff->name = name; buff->offset = tmp; if (!HAS_SECTIONS(handle)) return 0; /* file sections specific data */ buff->clock = save_read_string(data, &size, &rsize); if (!buff->clock) return -1; if (*name == '\0' && !handle->trace_clock) handle->trace_clock = strdup(buff->clock); if (id == TRACECMD_OPTION_BUFFER) { if (save_read_number(handle->pevent, data, &size, &rsize, 4, &tmp)) return -1; buff->page_size = tmp; if (save_read_number(handle->pevent, data, &size, &rsize, 4, &tmp)) return -1; buff->cpus = tmp; if (!buff->cpus) return 0; cpu_data = calloc(buff->cpus, sizeof(*cpu_data)); if (!cpu_data) return -1; for (i = 0; i < buff->cpus; i++) { if (save_read_number(handle->pevent, data, &size, &rsize, 4, &tmp)) goto fail; if ((long long)tmp > max_cpu) max_cpu = tmp; cpu_data[i].cpu = tmp; if (save_read_number(handle->pevent, data, &size, &rsize, 8, &cpu_data[i].offset)) goto fail; if (save_read_number(handle->pevent, data, &size, &rsize, 8, &cpu_data[i].size)) goto fail; } if (buff->cpus == max_cpu + 1) { /* Check to make sure cpus match the index */ for (i = 0; i < buff->cpus; i++) { if (cpu_data[i].cpu != i) goto copy_buffer; } buff->cpu_data = cpu_data; } else { copy_buffer: buff->cpu_data = calloc(max_cpu + 1, sizeof(*cpu_data)); if (!buff->cpu_data) goto fail; for (i = 0; i < buff->cpus; i++) { if (buff->cpu_data[cpu_data[i].cpu].size) { tracecmd_warning("More than one buffer defined for CPU %d (buffer %d)\n", cpu_data[i].cpu, i); goto fail; } buff->cpu_data[cpu_data[i].cpu] = cpu_data[i]; } buff->cpus = max_cpu + 1; free(cpu_data); } } else { buff->latency = true; } return 0; fail: free(cpu_data); return -1; } static int handle_options(struct tracecmd_input *handle) { long long offset; unsigned short option; unsigned int size; unsigned short id, flags; char *cpustats = NULL; struct hook_list *hook; bool compress = false; char *buf; int cpus; int ret; if (!HAS_SECTIONS(handle)) { handle->options_start = lseek(handle->fd, 0, SEEK_CUR); } else { if (read_section_header(handle, &id, &flags, NULL, NULL)) return -1; if (id != TRACECMD_OPTION_DONE) return -1; if (flags & TRACECMD_SEC_FL_COMPRESS) compress = true; } if (compress && in_uncompress_block(handle)) return -1; for (;;) { ret = read2(handle, &option); if (ret) goto out; if (!HAS_SECTIONS(handle) && option == TRACECMD_OPTION_DONE) break; /* next 4 bytes is the size of the option */ ret = read4(handle, &size); if (ret) goto out; buf = malloc(size); if (!buf) { ret = -ENOMEM; goto out; } ret = do_read_check(handle, buf, size); if (ret) goto out; switch (option) { case TRACECMD_OPTION_DATE: /* * A time has been mapped that is the * difference between the timestamps and * gtod. It is stored as ASCII with '0x' * appended. */ if (handle->flags & (TRACECMD_FL_IGNORE_DATE | TRACECMD_FL_RAW_TS)) break; offset = strtoll(buf, NULL, 0); /* Convert from micro to nano */ offset *= 1000; handle->ts_offset += offset; break; case TRACECMD_OPTION_OFFSET: /* * Similar to date option, but just adds an * offset to the timestamp. */ if (handle->flags & TRACECMD_FL_RAW_TS) break; offset = strtoll(buf, NULL, 0); handle->ts_offset += offset; break; case TRACECMD_OPTION_TIME_SHIFT: /* * long long int (8 bytes) trace session ID * int (4 bytes) protocol flags. * int (4 bytes) CPU count. * array of size [CPU count]: * [ * int (4 bytes) count of timestamp offsets. * long long array of size [count] of times, * when the offsets were calculated. * long long array of size [count] of timestamp offsets. * long long array of size [count] of timestamp scaling ratios.* * ] * array of size [CPU count]: * [ * long long array of size [count] of timestamp scaling fraction bits.* * ]* */ if (size < 16 || (handle->flags & TRACECMD_FL_RAW_TS)) break; handle->host.peer_trace_id = tep_read_number(handle->pevent, buf, 8); handle->host.flags = tep_read_number(handle->pevent, buf + 8, 4); ret = tsync_cpu_offsets_load(handle, buf + 12, size - 12); if (ret < 0) goto out; tracecmd_enable_tsync(handle, true); break; case TRACECMD_OPTION_CPUSTAT: buf[size-1] = '\n'; cpustats = realloc(handle->cpustats, handle->cpustats_size + size + 1); if (!cpustats) { ret = -ENOMEM; goto out; } memcpy(cpustats + handle->cpustats_size, buf, size); handle->cpustats_size += size; cpustats[handle->cpustats_size] = 0; handle->cpustats = cpustats; break; case TRACECMD_OPTION_BUFFER: case TRACECMD_OPTION_BUFFER_TEXT: ret = handle_buffer_option(handle, option, buf, size); if (ret < 0) goto out; break; case TRACECMD_OPTION_TRACECLOCK: tracecmd_parse_trace_clock(handle, buf, size); if (!handle->ts2secs) handle->use_trace_clock = true; break; case TRACECMD_OPTION_UNAME: handle->uname = strdup(buf); break; case TRACECMD_OPTION_VERSION: handle->version = strdup(buf); break; case TRACECMD_OPTION_HOOK: hook = tracecmd_create_event_hook(buf); hook->next = handle->hooks; handle->hooks = hook; break; case TRACECMD_OPTION_CPUCOUNT: cpus = *(int *)buf; handle->cpus = tep_read_number(handle->pevent, &cpus, 4); if (handle->cpus > handle->max_cpu) handle->max_cpu = handle->cpus; tep_set_cpus(handle->pevent, handle->cpus); break; case TRACECMD_OPTION_PROCMAPS: if (buf[size-1] == '\0') trace_pid_map_load(handle, buf); break; case TRACECMD_OPTION_TRACEID: if (size < 8) break; handle->trace_id = tep_read_number(handle->pevent, buf, 8); break; case TRACECMD_OPTION_GUEST: trace_guest_load(handle, buf, size); break; case TRACECMD_OPTION_TSC2NSEC: if (size < 16 || (handle->flags & TRACECMD_FL_RAW_TS)) break; handle->tsc_calc.mult = tep_read_number(handle->pevent, buf, 4); handle->tsc_calc.shift = tep_read_number(handle->pevent, buf + 4, 4); handle->tsc_calc.offset = tep_read_number(handle->pevent, buf + 8, 8); if (!(handle->flags & TRACECMD_FL_RAW_TS)) handle->flags |= TRACECMD_FL_IN_USECS; break; case TRACECMD_OPTION_HEADER_INFO: case TRACECMD_OPTION_FTRACE_EVENTS: case TRACECMD_OPTION_EVENT_FORMATS: case TRACECMD_OPTION_KALLSYMS: case TRACECMD_OPTION_PRINTK: case TRACECMD_OPTION_CMDLINES: if (size < 8) break; section_add_or_update(handle, option, -1, tep_read_number(handle->pevent, buf, 8), 0); break; case TRACECMD_OPTION_DONE: if (compress) in_uncompress_reset(handle); ret = handle_option_done(handle, buf, size); free(buf); return ret; default: tracecmd_warning("unknown option %d", option); break; } free(buf); } ret = 0; out: if (compress) in_uncompress_reset(handle); return ret; } static int read_options_type(struct tracecmd_input *handle) { char buf[10]; if (CHECK_READ_STATE(handle, TRACECMD_FILE_CPU_LATENCY)) return 0; if (do_read_check(handle, buf, 10)) return -1; /* check if this handles options */ if (strncmp(buf, "options", 7) == 0) { if (handle_options(handle) < 0) return -1; handle->file_state = TRACECMD_FILE_OPTIONS; if (do_read_check(handle, buf, 10)) return -1; } /* * Check if this is a latency report or flyrecord. */ if (strncmp(buf, "latency", 7) == 0) handle->file_state = TRACECMD_FILE_CPU_LATENCY; else if (strncmp(buf, "flyrecord", 9) == 0) handle->file_state = TRACECMD_FILE_CPU_FLYRECORD; else return -1; return 0; } int tracecmd_latency_data_read(struct tracecmd_input *handle, char **buf, size_t *size) { struct cpu_zdata *zdata = &handle->latz; void *data; int rsize; int fd = -1; int id; if (!handle || !buf || !size) return -1; if (handle->file_state != TRACECMD_FILE_CPU_LATENCY) return -1; if (!handle->cpu_compressed) { fd = handle->fd; } else if (!handle->read_zpage) { if (zdata->fd < 0) return -1; fd = zdata->fd; } /* Read data from a file */ if (fd >= 0) { if (!(*buf)) { *size = BUFSIZ; *buf = malloc(*size); if (!(*buf)) return -1; } return do_read_fd(fd, *buf, *size); } /* Uncompress data in memory */ if (zdata->last_chunk >= zdata->count) return 0; id = zdata->last_chunk; if (!*buf || *size < zdata->chunks[id].size) { data = realloc(*buf, zdata->chunks[id].size); if (!data) return -1; *buf = data; *size = zdata->chunks[id].size; } if (tracecmd_uncompress_chunk(handle->compress, &zdata->chunks[id], *buf)) return -1; rsize = zdata->chunks[id].size; zdata->last_chunk++; return rsize; } static int init_cpu_data(struct tracecmd_input *handle) { enum kbuffer_long_size long_size; enum kbuffer_endian endian; unsigned long long max_size = 0; unsigned long long pages; int cpu; /* We expect this to be flyrecord */ if (handle->file_state != TRACECMD_FILE_CPU_FLYRECORD) return -1; if (force_read) handle->read_page = true; if (handle->long_size == 8) long_size = KBUFFER_LSIZE_8; else long_size = KBUFFER_LSIZE_4; if (tep_is_file_bigendian(handle->pevent)) endian = KBUFFER_ENDIAN_BIG; else endian = KBUFFER_ENDIAN_LITTLE; for (cpu = 0; cpu < handle->cpus; cpu++) { handle->cpu_data[cpu].compress.fd = -1; handle->cpu_data[cpu].kbuf = kbuffer_alloc(long_size, endian); if (!handle->cpu_data[cpu].kbuf) goto out_free; if (tep_is_old_format(handle->pevent)) kbuffer_set_old_format(handle->cpu_data[cpu].kbuf); if (handle->cpu_data[cpu].file_size > max_size) max_size = handle->cpu_data[cpu].file_size; } /* Calculate about a meg of pages for buffering */ pages = handle->page_size ? max_size / handle->page_size : 0; if (!pages) pages = 1; pages = normalize_size(pages); handle->page_map_size = handle->page_size * pages; if (handle->page_map_size < handle->page_size) handle->page_map_size = handle->page_size; for (cpu = 0; cpu < handle->cpus; cpu++) { if (init_cpu(handle, cpu)) goto out_free; } return 0; out_free: for ( ; cpu >= 0; cpu--) { free_page(handle, cpu); kbuffer_free(handle->cpu_data[cpu].kbuf); handle->cpu_data[cpu].kbuf = NULL; } return -1; } int init_latency_data(struct tracecmd_input *handle) { size_t wsize; int ret; if (!handle->cpu_compressed) return 0; if (handle->read_zpage) { handle->latz.count = tracecmd_load_chunks_info(handle->compress, &handle->latz.chunks); if (handle->latz.count < 0) return -1; } else { strcpy(handle->latz.file, COMPR_TEMP_FILE); handle->latz.fd = mkstemp(handle->latz.file); if (handle->latz.fd < 0) return -1; ret = tracecmd_uncompress_copy_to(handle->compress, handle->latz.fd, NULL, &wsize); if (ret) return -1; lseek(handle->latz.fd, 0, SEEK_SET); } return 0; } static int init_buffer_cpu_data(struct tracecmd_input *handle, struct input_buffer_instance *buffer) { unsigned long long offset; unsigned long long size; unsigned short id, flags; int cpu; if (handle->cpu_data) return -1; if (lseek(handle->fd, buffer->offset, SEEK_SET) == (off_t)-1) return -1; if (read_section_header(handle, &id, &flags, NULL, NULL)) return -1; if (flags & TRACECMD_SEC_FL_COMPRESS) handle->cpu_compressed = true; if (buffer->latency) { handle->file_state = TRACECMD_FILE_CPU_LATENCY; return init_latency_data(handle) == 0 ? 1 : -1; } handle->file_state = TRACECMD_FILE_CPU_FLYRECORD; handle->cpus = buffer->cpus; if (handle->max_cpu < handle->cpus) handle->max_cpu = handle->cpus; handle->cpu_data = calloc(handle->cpus, sizeof(*handle->cpu_data)); if (!handle->cpu_data) return -1; for (cpu = 0; cpu < handle->cpus; cpu++) { handle->cpu_data[cpu].cpu = buffer->cpu_data[cpu].cpu; offset = buffer->cpu_data[cpu].offset; size = buffer->cpu_data[cpu].size; handle->cpu_data[cpu].file_offset = offset; handle->cpu_data[cpu].file_size = size; if (size && (offset + size > handle->total_file_size)) { /* this happens if the file got truncated */ printf("File possibly truncated. " "Need at least %llu, but file size is %zu.\n", offset + size, handle->total_file_size); errno = EINVAL; return -1; } } return init_cpu_data(handle); } static int read_cpu_data(struct tracecmd_input *handle) { unsigned long long size; int cpus; int cpu; /* * Check if this is a latency report or not. */ if (handle->file_state == TRACECMD_FILE_CPU_LATENCY) return 1; /* We expect this to be flyrecord */ if (handle->file_state != TRACECMD_FILE_CPU_FLYRECORD) return -1; cpus = handle->cpus; handle->cpu_data = malloc(sizeof(*handle->cpu_data) * handle->cpus); if (!handle->cpu_data) return -1; memset(handle->cpu_data, 0, sizeof(*handle->cpu_data) * handle->cpus); for (cpu = 0; cpu < handle->cpus; cpu++) { unsigned long long offset; handle->cpu_data[cpu].cpu = cpu; read8(handle, &offset); read8(handle, &size); handle->cpu_data[cpu].file_offset = offset; handle->cpu_data[cpu].file_size = size; if (size && (offset + size > handle->total_file_size)) { /* this happens if the file got truncated */ printf("File possibly truncated. " "Need at least %llu, but file size is %zu.\n", offset + size, handle->total_file_size); errno = EINVAL; return -1; } } /* * It is possible that an option changed the number of CPUs. * If that happened, then there's "empty" cpu data saved for * backward compatibility. */ if (cpus < handle->cpus) { unsigned long long ignore; int once = 0; read8(handle, &ignore); /* offset */ read8(handle, &ignore); /* size */ if (ignore != 0) { if (!once) { tracecmd_warning("ignored CPU data not zero size"); once++; } } } return init_cpu_data(handle); } static int read_data_and_size(struct tracecmd_input *handle, char **data, unsigned long long *size) { if (read8(handle, size) < 0) return -1; *data = malloc(*size + 1); if (!*data) return -1; if (do_read_check(handle, *data, *size)) { free(*data); return -1; } return 0; } static int read_and_parse_cmdlines(struct tracecmd_input *handle) { struct tep_handle *pevent = handle->pevent; unsigned long long size; char *cmdlines; if (CHECK_READ_STATE(handle, TRACECMD_FILE_CMD_LINES)) return 0; if (!HAS_SECTIONS(handle)) section_add_or_update(handle, TRACECMD_OPTION_CMDLINES, 0, 0, lseek(handle->fd, 0, SEEK_CUR)); if (read_data_and_size(handle, &cmdlines, &size) < 0) return -1; cmdlines[size] = 0; tep_parse_saved_cmdlines(pevent, cmdlines); free(cmdlines); handle->file_state = TRACECMD_FILE_CMD_LINES; return 0; } static void extract_trace_clock(struct tracecmd_input *handle, char *line) { char *clock = NULL; char *next = NULL; char *data; data = strtok_r(line, "[]", &next); sscanf(data, "%ms", &clock); /* TODO: report if it fails to allocate */ handle->trace_clock = clock; if (!clock) return; /* Clear usecs if raw timestamps are requested */ if (handle->flags & TRACECMD_FL_RAW_TS) handle->flags &= ~TRACECMD_FL_IN_USECS; /* tsc_calc is a conversion to nanoseconds */ if (handle->tsc_calc.mult) return; /* Clear usecs if not one of the specified clocks */ if (strcmp(clock, "local") && strcmp(clock, "global") && strcmp(clock, "uptime") && strcmp(clock, "perf") && strncmp(clock, "mono", 4) && strcmp(clock, TSCNSEC_CLOCK) && strcmp(clock, "tai")) handle->flags &= ~TRACECMD_FL_IN_USECS; } void tracecmd_parse_trace_clock(struct tracecmd_input *handle, char *file, int size __maybe_unused) { char *line; char *next = NULL; line = strtok_r(file, " ", &next); while (line) { /* current trace_clock is shown as "[local]". */ if (*line == '[') return extract_trace_clock(handle, line); line = strtok_r(NULL, " ", &next); } } static int read_and_parse_trace_clock(struct tracecmd_input *handle, struct tep_handle *pevent) { unsigned long long size; char *trace_clock; if (read_data_and_size(handle, &trace_clock, &size) < 0) return -1; trace_clock[size] = 0; tracecmd_parse_trace_clock(handle, trace_clock, size); free(trace_clock); return 0; } static int init_data_v6(struct tracecmd_input *handle) { struct tep_handle *pevent = handle->pevent; int ret; ret = read_cpu_data(handle); if (ret < 0) return ret; if (handle->use_trace_clock) { /* * There was a bug in the original setting of * the trace_clock file which let it get * corrupted. If it fails to read, force local * clock. */ if (read_and_parse_trace_clock(handle, pevent) < 0) { char clock[] = "[local]"; tracecmd_warning("File has trace_clock bug, using local clock"); tracecmd_parse_trace_clock(handle, clock, 8); } } return ret; } static int init_data(struct tracecmd_input *handle) { return init_buffer_cpu_data(handle, &handle->top_buffer); } /** * tracecmd_init_data - prepare reading the data from trace.dat * @handle: input handle for the trace.dat file * * This prepares reading the data from trace.dat. This is called * after tracecmd_read_headers() and before tracecmd_read_data(). */ int tracecmd_init_data(struct tracecmd_input *handle) { int ret; if (!HAS_SECTIONS(handle)) ret = init_data_v6(handle); else ret = init_data(handle); tracecmd_blk_hack(handle); return ret; } /** * tracecmd_make_pipe - Have the handle read a pipe instead of a file * @handle: input handle to read from a pipe * @cpu: the cpu that the pipe represents * @fd: the read end of the pipe * @cpus: the total number of cpus for this handle * * In order to stream data from the binary trace files and produce * output or analyze the data, a tracecmd_input descriptor needs to * be created, and then converted into a form that can act on a * pipe. * * Note, there are limitations to what this descriptor can do. * Most notibly, it can not read backwards. Once a page is read * it can not be read at a later time (except if a record is attached * to it and is holding the page ref). * * It is expected that the handle has already been created and * tracecmd_read_headers() has run on it. */ int tracecmd_make_pipe(struct tracecmd_input *handle, int cpu, int fd, int cpus) { enum kbuffer_long_size long_size; enum kbuffer_endian endian; handle->read_page = true; handle->use_pipe = true; if (!handle->cpus) { handle->cpus = cpus; handle->cpu_data = malloc(sizeof(*handle->cpu_data) * handle->cpus); if (!handle->cpu_data) return -1; } if (cpu >= handle->cpus) return -1; if (handle->long_size == 8) long_size = KBUFFER_LSIZE_8; else long_size = KBUFFER_LSIZE_4; if (tep_is_file_bigendian(handle->pevent)) endian = KBUFFER_ENDIAN_BIG; else endian = KBUFFER_ENDIAN_LITTLE; memset(&handle->cpu_data[cpu], 0, sizeof(handle->cpu_data[cpu])); handle->cpu_data[cpu].pipe_fd = fd; handle->cpu_data[cpu].cpu = cpu; handle->cpu_data[cpu].kbuf = kbuffer_alloc(long_size, endian); if (!handle->cpu_data[cpu].kbuf) return -1; if (tep_is_old_format(handle->pevent)) kbuffer_set_old_format(handle->cpu_data[cpu].kbuf); handle->cpu_data[cpu].file_offset = 0; handle->cpu_data[cpu].file_size = -1; init_cpu(handle, cpu); return 0; } /** * tracecmd_print_events - print the events that are stored in trace.dat * @handle: input handle for the trace.dat file * @regex: regex of events to print (NULL is all events) * * This is a debugging routine to print out the events that * are stored in a given trace.dat file. */ void tracecmd_print_events(struct tracecmd_input *handle, const char *regex) { if (!regex) regex = ".*"; if (!HAS_SECTIONS(handle)) read_headers_v6(handle, TRACECMD_FILE_ALL_EVENTS, regex); read_headers(handle, regex); } /* Show the cpu data stats */ static void show_cpu_stats(struct tracecmd_input *handle) { struct cpu_data *cpu_data; int i; for (i = 0; i < handle->cpus; i++) { cpu_data = &handle->cpu_data[i]; printf("CPU%d data recorded at offset=0x%llx\n", i, cpu_data->file_offset); printf(" %lld bytes in size\n", cpu_data->file_size); } } /** * tracecmd_print_stats - prints the stats recorded in the options. * @handle: input handle for the trace.dat file * * Looks for the option TRACECMD_OPTION_CPUSTAT and prints out what's * stored there, if it is found. Otherwise it prints that none were found. */ void tracecmd_print_stats(struct tracecmd_input *handle) { if (handle->cpustats) printf("%s\n", handle->cpustats); else printf(" No stats in this file\n"); show_cpu_stats(handle); } /** * tracecmd_print_uname - prints the recorded uname if it was recorded * @handle: input handle for the trace.dat file * * Looks for the option TRACECMD_OPTION_UNAME and prints out what's * stored there, if it is found. Otherwise it prints that none were found. */ void tracecmd_print_uname(struct tracecmd_input *handle) { if (handle->uname) printf("%s\n", handle->uname); else printf(" uname was not recorded in this file\n"); } /** * tracecmd_print_uname - prints the recorded uname if it was recorded * @handle: input handle for the trace.dat file * * Looks for the option TRACECMD_OPTION_VERSION and prints out what's * stored there, if it is found. Otherwise it prints that none were found. */ void tracecmd_print_version(struct tracecmd_input *handle) { if (handle->version) printf("%s\n", handle->version); else printf(" version was not recorded in this file\n"); } /** * tracecmd_hooks - return the event hooks that were used in record * @handle: input handle for the trace.dat file * * If trace-cmd record used -H to save hooks, they are parsed and * presented as hooks here. * * Returns the hook list (do not free it, they are freed on close) */ struct hook_list *tracecmd_hooks(struct tracecmd_input *handle) { return handle->hooks; } static int init_metadata_strings(struct tracecmd_input *handle, int size) { char *tmp; tmp = realloc(handle->strings, handle->strings_size + size); if (!tmp) return -1; handle->strings = tmp; if (do_read_check(handle, handle->strings + handle->strings_size, size)) return -1; handle->strings_size += size; return 0; } static int read_metadata_strings(struct tracecmd_input *handle) { unsigned short flags; int found = 0; unsigned short id; unsigned int csize, rsize; unsigned long long size; off_t offset; offset = lseek(handle->fd, 0, SEEK_CUR); do { if (read_section_header(handle, &id, &flags, &size, NULL)) break; if (id == TRACECMD_OPTION_STRINGS) { found++; if ((flags & TRACECMD_SEC_FL_COMPRESS)) { read4(handle, &csize); read4(handle, &rsize); do_lseek(handle, -8, SEEK_CUR); if (in_uncompress_block(handle)) break; } else { rsize = size; } init_metadata_strings(handle, rsize); if (flags & TRACECMD_SEC_FL_COMPRESS) in_uncompress_reset(handle); } else { if (lseek(handle->fd, size, SEEK_CUR) == (off_t)-1) break; } } while (1); if (lseek(handle->fd, offset, SEEK_SET) == (off_t)-1) return -1; return found ? 0 : -1; } /** * tracecmd_alloc_fd - create a tracecmd_input handle from a file descriptor * @fd: the file descriptor for the trace.dat file * @flags: bitmask of enum tracecmd_open_flags * * Allocate a tracecmd_input handle from a file descriptor and open the * file. This tests if the file is of trace-cmd format and allocates * a parse event descriptor. * * The returned pointer is not ready to be read yet. A tracecmd_read_headers() * and tracecmd_init_data() still need to be called on the descriptor. * * Unless you know what you are doing with this, you want to use * tracecmd_open_fd() instead. */ struct tracecmd_input *tracecmd_alloc_fd(int fd, int flags) { struct tracecmd_input *handle; char test[] = TRACECMD_MAGIC; unsigned int page_size; size_t offset; char *version = NULL; char *zver = NULL; char *zname = NULL; char buf[BUFSIZ]; unsigned long ver; handle = malloc(sizeof(*handle)); if (!handle) return NULL; memset(handle, 0, sizeof(*handle)); handle->fd = fd; handle->ref = 1; handle->latz.fd = -1; /* By default, use usecs, unless told otherwise */ handle->flags |= TRACECMD_FL_IN_USECS; #ifdef INMEMORY_DECOMPRESS handle->read_zpage = 1; #endif if (do_read_check(handle, buf, 3)) goto failed_read; if (memcmp(buf, test, 3) != 0) goto failed_read; if (do_read_check(handle, buf, 7)) goto failed_read; if (memcmp(buf, "tracing", 7) != 0) goto failed_read; version = read_string(handle); if (!version) goto failed_read; tracecmd_info("version = %s", version); ver = strtol(version, NULL, 10); if (!ver && errno) goto failed_read; if (!tracecmd_is_version_supported(ver)) { tracecmd_warning("Unsupported file version %lu", ver); goto failed_read; } handle->file_version = ver; free(version); version = NULL; if (handle->file_version >= FILE_VERSION_SECTIONS) handle->flags |= TRACECMD_FL_SECTIONED; if (handle->file_version >= FILE_VERSION_COMPRESSION) handle->flags |= TRACECMD_FL_COMPRESSION; if (do_read_check(handle, buf, 1)) goto failed_read; handle->pevent = tep_alloc(); if (!handle->pevent) goto failed_read; /* register default ftrace functions first */ if (!(flags & TRACECMD_FL_LOAD_NO_PLUGINS) && !(flags & TRACECMD_FL_LOAD_NO_SYSTEM_PLUGINS)) tracecmd_ftrace_overrides(handle, &handle->finfo); handle->plugin_list = trace_load_plugins(handle->pevent, flags); tep_set_file_bigendian(handle->pevent, buf[0]); tep_set_local_bigendian(handle->pevent, tracecmd_host_bigendian()); do_read_check(handle, buf, 1); handle->long_size = buf[0]; tep_set_long_size(handle->pevent, handle->long_size); read4(handle, &page_size); handle->page_size = page_size; handle->next_offset = page_size; offset = lseek(handle->fd, 0, SEEK_CUR); handle->total_file_size = lseek(handle->fd, 0, SEEK_END); lseek(handle->fd, offset, SEEK_SET); if (HAS_COMPRESSION(handle)) { zname = read_string(handle); if (!zname) goto failed_read; zver = read_string(handle); if (!zver) goto failed_read; if (strcmp(zname, "none") == 0) { handle->read_zpage = false; handle->flags &= ~TRACECMD_FL_COMPRESSION; } else { handle->compress = tracecmd_compress_alloc(zname, zver, handle->fd, handle->pevent, NULL); if (!handle->compress) { tracecmd_warning("Unsupported file compression %s %s", zname, zver); goto failed_read; } } free(zname); free(zver); } if (HAS_SECTIONS(handle)) { if (read8(handle, &(handle->options_start))) { tracecmd_warning("Filed to read the offset of the first option section"); goto failed_read; } read_metadata_strings(handle); } handle->file_state = TRACECMD_FILE_INIT; return handle; failed_read: free(version); free(zname); free(zver); free(handle); return NULL; } /** * tracecmd_alloc_fd - create a tracecmd_input handle from a file name * @file: the file name of the file that is of tracecmd data type. * @flags: bitmask of enum tracecmd_open_flags * * Allocate a tracecmd_input handle from a given file name and open the * file. This tests if the file is of trace-cmd format and allocates * a parse event descriptor. * * The returned pointer is not ready to be read yet. A tracecmd_read_headers() * and tracecmd_init_data() still need to be called on the descriptor. * * Unless you know what you are doing with this, you want to use * tracecmd_open() instead. */ struct tracecmd_input *tracecmd_alloc(const char *file, int flags) { int fd; fd = open(file, O_RDONLY); if (fd < 0) return NULL; return tracecmd_alloc_fd(fd, flags); } /** * tracecmd_open_fd - create a tracecmd_handle from the trace.dat file descriptor * @fd: the file descriptor for the trace.dat file * @flags: bitmask of enum tracecmd_open_flags */ struct tracecmd_input *tracecmd_open_fd(int fd, int flags) { struct tracecmd_input *handle; int ret; handle = tracecmd_alloc_fd(fd, flags); if (!handle) return NULL; if (tracecmd_read_headers(handle, 0) < 0) goto fail; if ((ret = tracecmd_init_data(handle)) < 0) goto fail; return handle; fail: tracecmd_close(handle); return NULL; } /** * tracecmd_open - create a tracecmd_handle from a given file * @file: the file name of the file that is of tracecmd data type. * @flags: bitmask of enum tracecmd_open_flags */ struct tracecmd_input *tracecmd_open(const char *file, int flags) { int fd; fd = open(file, O_RDONLY); if (fd < 0) return NULL; return tracecmd_open_fd(fd, flags); } /** * tracecmd_open_head - create a tracecmd_handle from a given file, read * and parse only the trace headers from the file * @file: the file name of the file that is of tracecmd data type. * @flags: bitmask of enum tracecmd_open_flags */ struct tracecmd_input *tracecmd_open_head(const char *file, int flags) { struct tracecmd_input *handle; int fd; fd = open(file, O_RDONLY); if (fd < 0) return NULL; handle = tracecmd_alloc_fd(fd, flags); if (!handle) return NULL; if (tracecmd_read_headers(handle, 0) < 0) goto fail; return handle; fail: tracecmd_close(handle); return NULL; } /** * tracecmd_ref - add a reference to the handle * @handle: input handle for the trace.dat file * * Some applications may share a handle between parts of * the application. Let those parts add reference counters * to the handle, and the last one to close it will free it. */ void tracecmd_ref(struct tracecmd_input *handle) { if (!handle) return; handle->ref++; } static inline void free_buffer(struct input_buffer_instance *buf) { free(buf->name); free(buf->clock); free(buf->cpu_data); } /** * tracecmd_close - close and free the trace.dat handle * @handle: input handle for the trace.dat file * * Close the file descriptor of the handle and frees * the resources allocated by the handle. */ void tracecmd_close(struct tracecmd_input *handle) { struct zchunk_cache *cache; struct file_section *del_sec; struct cpu_data *cpu_data; struct page_map *page_map, *n; int cpu; int i; if (!handle) return; if (handle->ref <= 0) { tracecmd_warning("tracecmd: bad ref count on handle"); return; } if (--handle->ref) return; for (cpu = 0; cpu < handle->cpus; cpu++) { /* The tracecmd_peek_data may have cached a record */ free_next(handle, cpu); free_page(handle, cpu); if (handle->cpu_data) { cpu_data = &handle->cpu_data[cpu]; if (cpu_data->kbuf) { kbuffer_free(cpu_data->kbuf); if (cpu_data->page_map) free_page_map(cpu_data->page_map); if (cpu_data->page_cnt) tracecmd_warning("%d pages still allocated on cpu %d%s", cpu_data->page_cnt, cpu, show_records(cpu_data->pages, cpu_data->nr_pages)); free(cpu_data->pages); } if (cpu_data->compress.fd >= 0) { close(cpu_data->compress.fd); unlink(cpu_data->compress.file); } while (cpu_data->compress.cache.node) { struct trace_rbtree_node *node; node = trace_rbtree_pop_nobalance(&cpu_data->compress.cache); cache = container_of(node, struct zchunk_cache, node); free(cache->map); free(cache); } free(cpu_data->compress.chunks); list_for_each_entry_safe(page_map, n, &cpu_data->page_maps, list) { list_del(&page_map->list); free(page_map); } } } free(handle->cpustats); free(handle->cpu_data); free(handle->uname); free(handle->trace_clock); free(handle->strings); free(handle->version); free(handle->followers); free(handle->missed_followers); trace_guest_map_free(handle->map); close(handle->fd); free(handle->latz.chunks); if (handle->latz.fd >= 0) { close(handle->latz.fd); unlink(handle->latz.file); } while (handle->sections) { del_sec = handle->sections; handle->sections = handle->sections->next; free(del_sec); } free_buffer(&handle->top_buffer); for (i = 0; i < handle->nr_buffers; i++) free_buffer(&handle->buffers[i]); free(handle->buffers); tracecmd_free_hooks(handle->hooks); handle->hooks = NULL; trace_pid_map_free(handle->pid_maps); handle->pid_maps = NULL; trace_tsync_offset_free(&handle->host); trace_guests_free(handle); tracecmd_filter_free(handle->filter); if (handle->flags & TRACECMD_FL_BUFFER_INSTANCE) tracecmd_close(handle->parent); else { /* Only main handle frees plugins, pevent and compression context */ tracecmd_compress_destroy(handle->compress); tep_unload_plugins(handle->plugin_list, handle->pevent); tep_free(handle->pevent); } free(handle); } static int read_copy_size8(struct tracecmd_input *in_handle, struct tracecmd_output *out_handle, unsigned long long *size) { /* read size */ if (do_read_check(in_handle, size, 8)) return -1; if (do_write_check(out_handle, size, 8)) return -1; *size = tep_read_number(in_handle->pevent, size, 8); return 0; } static int read_copy_size4(struct tracecmd_input *in_handle, struct tracecmd_output *out_handle, unsigned int *size) { /* read size */ if (do_read_check(in_handle, size, 4)) return -1; if (do_write_check(out_handle, size, 4)) return -1; *size = tep_read_number(in_handle->pevent, size, 4); return 0; } static int read_copy_data(struct tracecmd_input *in_handle, unsigned long long size, struct tracecmd_output *out_handle) { char *buf; buf = malloc(size); if (!buf) return -1; if (do_read_check(in_handle, buf, size)) goto failed_read; if (do_write_check(out_handle, buf, size)) goto failed_read; free(buf); return 0; failed_read: free(buf); return -1; } static bool check_in_state(struct tracecmd_input *handle, int new_state) { return check_file_state(handle->file_version, handle->file_state, new_state); } static int copy_header_files(struct tracecmd_input *in_handle, struct tracecmd_output *out_handle) { bool compress = out_check_compression(out_handle); struct file_section *sec; unsigned long long offset; unsigned long long size; if (!check_in_state(in_handle, TRACECMD_FILE_HEADERS) || !check_out_state(out_handle, TRACECMD_FILE_HEADERS)) return -1; sec = section_open(in_handle, TRACECMD_OPTION_HEADER_INFO); if (!sec) return -1; offset = out_write_section_header(out_handle, TRACECMD_OPTION_HEADER_INFO, "headers", TRACECMD_SEC_FL_COMPRESS, true); out_compression_start(out_handle, compress); /* "header_page" */ if (read_copy_data(in_handle, 12, out_handle) < 0) goto error; if (read_copy_size8(in_handle, out_handle, &size) < 0) goto error; if (read_copy_data(in_handle, size, out_handle) < 0) goto error; /* "header_event" */ if (read_copy_data(in_handle, 13, out_handle) < 0) goto error; if (read_copy_size8(in_handle, out_handle, &size) < 0) goto error; if (read_copy_data(in_handle, size, out_handle) < 0) goto error; in_handle->file_state = TRACECMD_FILE_HEADERS; if (out_compression_end(out_handle, compress)) goto error; out_set_file_state(out_handle, in_handle->file_state); section_close(in_handle, sec); if (out_update_section_header(out_handle, offset)) goto error; return 0; error: out_compression_reset(out_handle, compress); section_close(in_handle, sec); return -1; } static int copy_ftrace_files(struct tracecmd_input *in_handle, struct tracecmd_output *out_handle) { bool compress = out_check_compression(out_handle); struct file_section *sec; unsigned long long offset; unsigned long long size; unsigned int count; unsigned int i; if (!check_in_state(in_handle, TRACECMD_FILE_FTRACE_EVENTS) || !check_out_state(out_handle, TRACECMD_FILE_FTRACE_EVENTS)) return -1; sec = section_open(in_handle, TRACECMD_OPTION_FTRACE_EVENTS); if (!sec) return -1; offset = out_write_section_header(out_handle, TRACECMD_OPTION_FTRACE_EVENTS, "ftrace events", TRACECMD_SEC_FL_COMPRESS, true); out_compression_start(out_handle, compress); if (read_copy_size4(in_handle, out_handle, &count) < 0) goto error; for (i = 0; i < count; i++) { if (read_copy_size8(in_handle, out_handle, &size) < 0) goto error; if (read_copy_data(in_handle, size, out_handle) < 0) goto error; } in_handle->file_state = TRACECMD_FILE_FTRACE_EVENTS; if (out_compression_end(out_handle, compress)) goto error; out_set_file_state(out_handle, in_handle->file_state); section_close(in_handle, sec); if (out_update_section_header(out_handle, offset)) goto error; return 0; error: out_compression_reset(out_handle, compress); section_close(in_handle, sec); return -1; } static int copy_event_files(struct tracecmd_input *in_handle, struct tracecmd_output *out_handle) { bool compress = out_check_compression(out_handle); struct file_section *sec; unsigned long long offset; unsigned long long size; char *system; unsigned int systems; unsigned int count; unsigned int i,x; if (!check_in_state(in_handle, TRACECMD_FILE_ALL_EVENTS) || !check_out_state(out_handle, TRACECMD_FILE_ALL_EVENTS)) return -1; sec = section_open(in_handle, TRACECMD_OPTION_EVENT_FORMATS); if (!sec) return -1; offset = out_write_section_header(out_handle, TRACECMD_OPTION_EVENT_FORMATS, "events format", TRACECMD_SEC_FL_COMPRESS, true); out_compression_start(out_handle, compress); if (read_copy_size4(in_handle, out_handle, &systems) < 0) goto error; for (i = 0; i < systems; i++) { system = read_string(in_handle); if (!system) goto error; if (do_write_check(out_handle, system, strlen(system) + 1)) { free(system); goto error; } free(system); if (read_copy_size4(in_handle, out_handle, &count) < 0) goto error; for (x=0; x < count; x++) { if (read_copy_size8(in_handle, out_handle, &size) < 0) goto error; if (read_copy_data(in_handle, size, out_handle) < 0) goto error; } } in_handle->file_state = TRACECMD_FILE_ALL_EVENTS; if (out_compression_end(out_handle, compress)) goto error; out_set_file_state(out_handle, in_handle->file_state); section_close(in_handle, sec); if (out_update_section_header(out_handle, offset)) goto error; return 0; error: out_compression_reset(out_handle, compress); section_close(in_handle, sec); return -1; } static int copy_proc_kallsyms(struct tracecmd_input *in_handle, struct tracecmd_output *out_handle) { bool compress = out_check_compression(out_handle); struct file_section *sec; unsigned long long offset; unsigned int size; if (!check_in_state(in_handle, TRACECMD_FILE_KALLSYMS) || !check_out_state(out_handle, TRACECMD_FILE_KALLSYMS)) return -1; sec = section_open(in_handle, TRACECMD_OPTION_KALLSYMS); if (!sec) return -1; offset = out_write_section_header(out_handle, TRACECMD_OPTION_KALLSYMS, "kallsyms", TRACECMD_SEC_FL_COMPRESS, true); out_compression_start(out_handle, compress); if (read_copy_size4(in_handle, out_handle, &size) < 0) goto error; if (!size) goto out; /* OK? */ if (read_copy_data(in_handle, size, out_handle) < 0) goto error; out: in_handle->file_state = TRACECMD_FILE_KALLSYMS; if (out_compression_end(out_handle, compress)) goto error; out_set_file_state(out_handle, in_handle->file_state); section_close(in_handle, sec); if (out_update_section_header(out_handle, offset)) goto error; return 0; error: out_compression_reset(out_handle, compress); section_close(in_handle, sec); return -1; } static int copy_ftrace_printk(struct tracecmd_input *in_handle, struct tracecmd_output *out_handle) { bool compress = out_check_compression(out_handle); struct file_section *sec; unsigned long long offset; unsigned int size; if (!check_in_state(in_handle, TRACECMD_FILE_PRINTK) || !check_out_state(out_handle, TRACECMD_FILE_PRINTK)) return -1; sec = section_open(in_handle, TRACECMD_OPTION_PRINTK); if (!sec) return -1; offset = out_write_section_header(out_handle, TRACECMD_OPTION_PRINTK, "printk", TRACECMD_SEC_FL_COMPRESS, true); out_compression_start(out_handle, compress); if (read_copy_size4(in_handle, out_handle, &size) < 0) goto error; if (!size) goto out; /* OK? */ if (read_copy_data(in_handle, size, out_handle) < 0) goto error; out: in_handle->file_state = TRACECMD_FILE_PRINTK; if (out_compression_end(out_handle, compress)) goto error; out_set_file_state(out_handle, in_handle->file_state); section_close(in_handle, sec); if (out_update_section_header(out_handle, offset)) goto error; return 0; error: out_compression_reset(out_handle, compress); section_close(in_handle, sec); return -1; } static int copy_command_lines(struct tracecmd_input *in_handle, struct tracecmd_output *out_handle) { bool compress = out_check_compression(out_handle); struct file_section *sec; unsigned long long offset; unsigned long long size; if (!check_in_state(in_handle, TRACECMD_FILE_CMD_LINES) || !check_out_state(out_handle, TRACECMD_FILE_CMD_LINES)) return -1; sec = section_open(in_handle, TRACECMD_OPTION_CMDLINES); if (!sec) return -1; offset = out_write_section_header(out_handle, TRACECMD_OPTION_CMDLINES, "command lines", TRACECMD_SEC_FL_COMPRESS, true); out_compression_start(out_handle, compress); if (read_copy_size8(in_handle, out_handle, &size) < 0) goto error; if (!size) goto out; /* OK? */ if (read_copy_data(in_handle, size, out_handle) < 0) goto error; out: in_handle->file_state = TRACECMD_FILE_CMD_LINES; if (out_compression_end(out_handle, compress)) goto error; out_set_file_state(out_handle, in_handle->file_state); section_close(in_handle, sec); if (out_update_section_header(out_handle, offset)) goto error; return 0; error: out_compression_reset(out_handle, compress); section_close(in_handle, sec); return -1; } static int copy_cpu_count(struct tracecmd_input *in_handle, struct tracecmd_output *out_handle) { unsigned int cpus; if (!check_in_state(in_handle, TRACECMD_FILE_CPU_COUNT) || !check_out_state(out_handle, TRACECMD_FILE_CPU_COUNT)) return -1; if (!HAS_SECTIONS(in_handle)) { if (read4(in_handle, &cpus)) return -1; } else { cpus = in_handle->max_cpu; } if (tracecmd_get_out_file_version(out_handle) < FILE_VERSION_SECTIONS) { cpus = tep_read_number(in_handle->pevent, &cpus, 4); if (do_write_check(out_handle, &cpus, 4)) return -1; } else { tracecmd_add_option(out_handle, TRACECMD_OPTION_CPUCOUNT, sizeof(int), &cpus); } in_handle->file_state = TRACECMD_FILE_CPU_COUNT; out_set_file_state(out_handle, in_handle->file_state); return 0; } /** * tracecmd_copy_headers - Copy headers from a tracecmd_input handle to a file descriptor * @in_handle: input handle for the trace.dat file to copy from. * @out_handle: output handle to the trace.dat file to copy to. * @start_state: The file state to start copying from (zero for the beginnig) * @end_state: The file state to stop at (zero for up to cmdlines) * * This is used to copy trace header data of a trace.dat file to a * file descriptor. Using @start_state and @end_state it may be used * multiple times against the input handle. * * NOTE: The input handle is also modified, and ends at the end * state as well. */ int tracecmd_copy_headers(struct tracecmd_input *in_handle, struct tracecmd_output *out_handle, enum tracecmd_file_states start_state, enum tracecmd_file_states end_state) { struct file_section *sec = NULL; int ret; if (!start_state) start_state = TRACECMD_FILE_HEADERS; if (!end_state) end_state = TRACECMD_FILE_CMD_LINES; if (start_state > end_state) return -1; if (end_state < TRACECMD_FILE_HEADERS) return 0; if (in_handle->file_state >= start_state) { /* Set the handle to just before the start state */ sec = section_open(in_handle, TRACECMD_OPTION_HEADER_INFO); if (!sec) return -1; /* Now that the file handle has moved, change its state */ in_handle->file_state = TRACECMD_FILE_INIT; } /* Try to bring the input up to the start state - 1 */ ret = tracecmd_read_headers(in_handle, start_state - 1); if (sec) section_close(in_handle, sec); if (ret < 0) goto out; switch (start_state) { case TRACECMD_FILE_HEADERS: ret = copy_header_files(in_handle, out_handle); if (ret < 0) goto out; /* fallthrough */ case TRACECMD_FILE_FTRACE_EVENTS: /* handle's state is now updating with the copies */ if (end_state <= in_handle->file_state) return 0; ret = copy_ftrace_files(in_handle, out_handle); if (ret < 0) goto out; /* fallthrough */ case TRACECMD_FILE_ALL_EVENTS: if (end_state <= in_handle->file_state) return 0; ret = copy_event_files(in_handle, out_handle); if (ret < 0) goto out; /* fallthrough */ case TRACECMD_FILE_KALLSYMS: if (end_state <= in_handle->file_state) return 0; ret = copy_proc_kallsyms(in_handle, out_handle); if (ret < 0) goto out; /* fallthrough */ case TRACECMD_FILE_PRINTK: if (end_state <= in_handle->file_state) return 0; ret = copy_ftrace_printk(in_handle, out_handle); if (ret < 0) goto out; /* fallthrough */ case TRACECMD_FILE_CMD_LINES: if (end_state <= in_handle->file_state) return 0; /* Optional */ copy_command_lines(in_handle, out_handle); /* fallthrough */ case TRACECMD_FILE_CPU_COUNT: if (end_state <= in_handle->file_state) return 0; ret = copy_cpu_count(in_handle, out_handle); if (ret < 0) goto out; /* fallthrough */ default: break; } out: return ret < 0 ? -1 : 0; } int tracecmd_copy_buffer_descr(struct tracecmd_input *in_handle, struct tracecmd_output *out_handle) { int i; if (tracecmd_get_out_file_version(out_handle) >= FILE_VERSION_SECTIONS) return 0; for (i = 0; i < in_handle->nr_buffers; i++) tracecmd_add_buffer_info(out_handle, in_handle->buffers[i].name, 0); return tracecmd_write_buffer_info(out_handle); } static int copy_options_recursive(struct tracecmd_input *in_handle, struct tracecmd_output *out_handle) { unsigned short id, flags = 0; unsigned short option, en2; unsigned long long next; unsigned int size, en4; bool skip; for (;;) { if (do_read_check(in_handle, &option, 2)) return -1; en2 = tep_read_number(in_handle->pevent, &option, 2); if (en2 == TRACECMD_OPTION_DONE && !HAS_SECTIONS(in_handle)) return 0; /* next 4 bytes is the size of the option */ if (do_read_check(in_handle, &size, 4)) return -1; en4 = tep_read_number(in_handle->pevent, &size, 4); if (en2 == TRACECMD_OPTION_DONE) { /* option done v7 */ if (en4 < 8) return -1; if (read8(in_handle, &next)) return -1; if (!next) break; if (do_lseek(in_handle, next, SEEK_SET) == (off_t)-1) return -1; if (read_section_header(in_handle, &id, &flags, NULL, NULL)) return -1; if (id != TRACECMD_OPTION_DONE) return -1; if (flags & TRACECMD_SEC_FL_COMPRESS && in_uncompress_block(in_handle)) return -1; return copy_options_recursive(in_handle, out_handle); } /* Do not copy these, as they have file specific offsets */ switch (en2) { case TRACECMD_OPTION_BUFFER: case TRACECMD_OPTION_BUFFER_TEXT: case TRACECMD_OPTION_HEADER_INFO: case TRACECMD_OPTION_FTRACE_EVENTS: case TRACECMD_OPTION_EVENT_FORMATS: case TRACECMD_OPTION_KALLSYMS: case TRACECMD_OPTION_PRINTK: case TRACECMD_OPTION_CMDLINES: skip = true; break; default: skip = false; break; } if (skip) { do_lseek(in_handle, en4, SEEK_CUR); continue; } if (do_write_check(out_handle, &option, 2)) return -1; if (do_write_check(out_handle, &size, 4)) return -1; if (read_copy_data(in_handle, en4, out_handle)) return -1; } return 0; } static int copy_options(struct tracecmd_input *in_handle, struct tracecmd_output *out_handle) { unsigned long long offset, start; unsigned short id, en2, flags = 0; int tmp; if (HAS_SECTIONS(in_handle)) { if (read_section_header(in_handle, &id, &flags, NULL, NULL)) return -1; if (id != TRACECMD_OPTION_DONE) return -1; if (flags & TRACECMD_SEC_FL_COMPRESS && in_uncompress_block(in_handle)) return -1; } start = tracecmd_get_out_file_offset(out_handle); if (tracecmd_get_out_file_version(out_handle) < FILE_VERSION_SECTIONS) { if (do_write_check(out_handle, "options ", 10)) return -1; } offset = out_write_section_header(out_handle, TRACECMD_OPTION_DONE, "options", 0, false); if (copy_options_recursive(in_handle, out_handle)) goto error; id = TRACECMD_OPTION_DONE; en2 = tep_read_number(in_handle->pevent, &id, 2); if (do_write_check(out_handle, &en2, 2)) goto error; if (tracecmd_get_out_file_version(out_handle) < FILE_VERSION_SECTIONS) { out_save_options_offset(out_handle, start); } else { tmp = 8; if (do_write_check(out_handle, &tmp, 4)) goto error; out_save_options_offset(out_handle, start); start = 0; if (do_write_check(out_handle, &start, 8)) goto error; } out_update_section_header(out_handle, offset); if (flags & TRACECMD_SEC_FL_COMPRESS) in_uncompress_reset(in_handle); in_handle->file_state = TRACECMD_FILE_OPTIONS; out_set_file_state(out_handle, in_handle->file_state); /* Append local options */ return tracecmd_append_options(out_handle); error: if (flags & TRACECMD_SEC_FL_COMPRESS) in_uncompress_reset(in_handle); return 0; } int tracecmd_copy_options(struct tracecmd_input *in_handle, struct tracecmd_output *out_handle) { if (!check_in_state(in_handle, TRACECMD_FILE_OPTIONS) || !check_out_state(out_handle, TRACECMD_FILE_OPTIONS)) return -1; if (!in_handle->options_start) return 0; if (lseek(in_handle->fd, in_handle->options_start, SEEK_SET) == (off_t)-1) return -1; if (copy_options(in_handle, out_handle) < 0) return -1; return 0; } static int copy_trace_latency(struct tracecmd_input *in_handle, struct tracecmd_output *out_handle, const char *buf_name) { int page_size = getpagesize(); unsigned long long wsize; unsigned long long offset; int fd; if (tracecmd_get_out_file_version(out_handle) < FILE_VERSION_SECTIONS && do_write_check(out_handle, "latency ", 10)) return -1; offset = tracecmd_get_out_file_offset(out_handle); if (tracecmd_get_out_file_version(out_handle) >= FILE_VERSION_SECTIONS && !out_add_buffer_option(out_handle, buf_name, TRACECMD_OPTION_BUFFER_TEXT, offset, 0, NULL, page_size)) return -1; offset = out_write_section_header(out_handle, TRACECMD_OPTION_BUFFER_TEXT, "buffer latency", TRACECMD_SEC_FL_COMPRESS, false); if (in_handle->latz.fd >= 0) fd = in_handle->latz.fd; else fd = in_handle->fd; if (!out_copy_fd_compress(out_handle, fd, 0, &wsize, page_size)) return -1; if (out_update_section_header(out_handle, offset)) return -1; out_set_file_state(out_handle, TRACECMD_FILE_CPU_LATENCY); return 0; } static int copy_trace_flyrecord_data(struct tracecmd_input *in_handle, struct tracecmd_output *out_handle, const char *buff_name) { struct cpu_data_source *data; int total_size = 0; int cpus; int ret; int i, j; if (tracecmd_get_out_file_version(out_handle) < FILE_VERSION_SECTIONS) cpus = in_handle->max_cpu; else cpus = in_handle->cpus; data = calloc(cpus, sizeof(struct cpu_data_source)); if (!data) return -1; for (i = 0; i < in_handle->cpus; i++) { j = in_handle->cpu_data[i].cpu; data[j].size = in_handle->cpu_data[i].file_size; total_size += data[j].size; if (in_handle->cpu_data[i].compress.fd >= 0) { data[j].fd = in_handle->cpu_data[i].compress.fd; data[j].offset = 0; } else { data[j].fd = in_handle->fd; data[j].offset = in_handle->cpu_data[i].file_offset; } } if (total_size || tracecmd_get_out_file_version(out_handle) < FILE_VERSION_SECTIONS) ret = out_write_cpu_data(out_handle, cpus, data, buff_name); else ret = 0; free(data); return ret; } static int copy_flyrecord_buffer(struct tracecmd_input *in_handle, struct tracecmd_output *out_handle, int index) { struct tracecmd_input *instance; const char *name; int ret; name = tracecmd_buffer_instance_name(in_handle, index); if (!name) return -1; instance = tracecmd_buffer_instance_handle(in_handle, index); if (!instance) return -1; if (!tracecmd_get_quiet(out_handle) && *name) fprintf(stderr, "\nBuffer: %s\n\n", name); if (in_handle->buffers[index].latency) ret = copy_trace_latency(in_handle, out_handle, name); else ret = copy_trace_flyrecord_data(instance, out_handle, name); tracecmd_close(instance); return ret; } static int copy_trace_data_from_v6(struct tracecmd_input *in_handle, struct tracecmd_output *out_handle) { char buf[10]; int ret; int i; if (do_read_check(in_handle, buf, 10)) return -1; if (strncmp(buf, "latency", 7) == 0) in_handle->file_state = TRACECMD_FILE_CPU_LATENCY; else if (strncmp(buf, "flyrecord", 9) == 0) in_handle->file_state = TRACECMD_FILE_CPU_FLYRECORD; tracecmd_init_data(in_handle); tracecmd_set_out_clock(out_handle, in_handle->trace_clock); if (in_handle->file_state == TRACECMD_FILE_CPU_LATENCY) return copy_trace_latency(in_handle, out_handle, ""); /* top instance */ ret = copy_trace_flyrecord_data(in_handle, out_handle, ""); if (ret) return ret; for (i = 0; i < in_handle->nr_buffers; i++) copy_flyrecord_buffer(in_handle, out_handle, i); return 0; } static int copy_trace_data_from_v7(struct tracecmd_input *in_handle, struct tracecmd_output *out_handle) { int ret; int i; /* Force using temporary files for trace data decompression */ in_handle->read_zpage = false; ret = tracecmd_init_data(in_handle); if (ret < 0) return ret; tracecmd_set_out_clock(out_handle, in_handle->trace_clock); /* copy top buffer */ if (in_handle->top_buffer.latency) ret = copy_trace_latency(in_handle, out_handle, in_handle->top_buffer.name); else if (in_handle->top_buffer.cpus) ret = copy_trace_flyrecord_data(in_handle, out_handle, in_handle->top_buffer.name); else if (tracecmd_get_out_file_version(out_handle) < FILE_VERSION_SECTIONS) ret = out_write_emty_cpu_data(out_handle, in_handle->max_cpu); if (ret) return ret; for (i = 0; i < in_handle->nr_buffers; i++) copy_flyrecord_buffer(in_handle, out_handle, i); return 0; } __hidden int tracecmd_copy_trace_data(struct tracecmd_input *in_handle, struct tracecmd_output *out_handle) { int ret; if (!check_in_state(in_handle, TRACECMD_FILE_CPU_FLYRECORD) || !check_out_state(out_handle, TRACECMD_FILE_CPU_FLYRECORD)) return -1; if (in_handle->file_version < FILE_VERSION_SECTIONS) ret = copy_trace_data_from_v6(in_handle, out_handle); else ret = copy_trace_data_from_v7(in_handle, out_handle); return ret; } /** * tracecmd_record_at_buffer_start - return true if record is first on subbuffer * @handle: input handle for the trace.dat file * @record: The record to test if it is the first record on page * * Returns true if the record is the first record on the page. */ int tracecmd_record_at_buffer_start(struct tracecmd_input *handle, struct tep_record *record) { struct page *page = record->priv; struct kbuffer *kbuf = handle->cpu_data[record->cpu].kbuf; int offset; if (!page || !kbuf) return 0; offset = record->offset - page->offset; return offset == kbuffer_start_of_data(kbuf); } unsigned long long tracecmd_page_ts(struct tracecmd_input *handle, struct tep_record *record) { struct page *page = record->priv; struct kbuffer *kbuf = handle->cpu_data[record->cpu].kbuf; if (!page || !kbuf) return 0; return kbuffer_subbuf_timestamp(kbuf, page->map); } unsigned int tracecmd_record_ts_delta(struct tracecmd_input *handle, struct tep_record *record) { struct kbuffer *kbuf = handle->cpu_data[record->cpu].kbuf; struct page *page = record->priv; int offset; if (!page || !kbuf) return 0; offset = record->offset - page->offset; return kbuffer_ptr_delta(kbuf, page->map + offset); } struct kbuffer *tracecmd_record_kbuf(struct tracecmd_input *handle, struct tep_record *record) { return handle->cpu_data[record->cpu].kbuf; } void *tracecmd_record_page(struct tracecmd_input *handle, struct tep_record *record) { struct page *page = record->priv; return page ? page->map : NULL; } void *tracecmd_record_offset(struct tracecmd_input *handle, struct tep_record *record) { struct page *page = record->priv; int offset; if (!page) return NULL; offset = record->offset - page->offset; return page->map + offset; } int tracecmd_buffer_instances(struct tracecmd_input *handle) { return handle->nr_buffers; } const char *tracecmd_buffer_instance_name(struct tracecmd_input *handle, int indx) { if (indx >= handle->nr_buffers) return NULL; return handle->buffers[indx].name; } struct tracecmd_input * tracecmd_buffer_instance_handle(struct tracecmd_input *handle, int indx) { struct tracecmd_input *new_handle; struct input_buffer_instance *buffer = &handle->buffers[indx]; size_t offset; ssize_t ret; if (indx >= handle->nr_buffers) return NULL; /* * We make a copy of the current handle, but we substitute * the cpu data with the cpu data for this buffer. */ new_handle = malloc(sizeof(*handle)); if (!new_handle) return NULL; *new_handle = *handle; memset(&new_handle->top_buffer, 0, sizeof(new_handle->top_buffer)); new_handle->cpu_data = NULL; new_handle->nr_buffers = 0; new_handle->buffers = NULL; new_handle->version = NULL; new_handle->sections = NULL; new_handle->strings = NULL; new_handle->guest = NULL; new_handle->ref = 1; if (handle->trace_clock) { new_handle->trace_clock = strdup(handle->trace_clock); if (!new_handle->trace_clock) { free(new_handle); return NULL; } } memset(&new_handle->host, 0, sizeof(new_handle->host)); new_handle->parent = handle; new_handle->cpustats = NULL; new_handle->hooks = NULL; if (handle->uname) /* Ignore if fails to malloc, no biggy */ new_handle->uname = strdup(handle->uname); tracecmd_ref(handle); new_handle->fd = dup(handle->fd); new_handle->flags |= TRACECMD_FL_BUFFER_INSTANCE; new_handle->pid_maps = NULL; if (!HAS_SECTIONS(handle)) { /* Save where we currently are */ offset = lseek(handle->fd, 0, SEEK_CUR); ret = lseek(handle->fd, buffer->offset, SEEK_SET); if (ret == (off_t)-1) { tracecmd_warning("could not seek to buffer %s offset %ld", buffer->name, buffer->offset); goto error; } /* * read_options_type() is called right after the CPU count so update * file state accordingly. */ new_handle->file_state = TRACECMD_FILE_CPU_COUNT; ret = read_options_type(new_handle); if (!ret) ret = read_cpu_data(new_handle); if (ret < 0) { tracecmd_warning("failed to read sub buffer %s", buffer->name); goto error; } ret = lseek(handle->fd, offset, SEEK_SET); if (ret < 0) { tracecmd_warning("could not seek to back to offset %ld", offset); goto error; } } else { new_handle->page_size = handle->buffers[indx].page_size; if (init_buffer_cpu_data(new_handle, buffer) < 0) goto error; } return new_handle; error: tracecmd_close(new_handle); return NULL; } int tracecmd_is_buffer_instance(struct tracecmd_input *handle) { return handle->flags & TRACECMD_FL_BUFFER_INSTANCE; } /** * tracecmd_long_size - return the size of "long" for the arch * @handle: input handle for the trace.dat file */ int tracecmd_long_size(struct tracecmd_input *handle) { return handle->long_size; } /** * tracecmd_page_size - return the PAGE_SIZE for the arch * @handle: input handle for the trace.dat file */ int tracecmd_page_size(struct tracecmd_input *handle) { return handle->page_size; } /** * tracecmd_cpus - return the number of CPUs recorded * @handle: input handle for the trace.dat file */ int tracecmd_cpus(struct tracecmd_input *handle) { return handle->max_cpu; } /** * tracecmd_get_tep - return the tep handle * @handle: input handle for the trace.dat file */ struct tep_handle *tracecmd_get_tep(struct tracecmd_input *handle) { return handle->pevent; } /** * tracecmd_get_in_file_version - return the trace.dat file version * @handle: input handle for the trace.dat file */ unsigned long tracecmd_get_in_file_version(struct tracecmd_input *handle) { return handle->file_version; } /** * tracecmd_get_file_compress_proto - get name and version of compression algorithm * @handle: input handle for the trace.dat file * @name: return, name of the compression algorithm. * @version: return, version of the compression algorithm. * * Get the name and the version of the compression algorithm, used to * compress the file associated with @handle. * Returns 0 on success, or -1 in case of an error. If 0 is returned, * the name and version of the algorithm are stored in @name and @version. * The returned strings must *not* be freed. */ int tracecmd_get_file_compress_proto(struct tracecmd_input *handle, const char **name, const char **version) { return tracecmd_compress_proto_get_name(handle->compress, name, version); } /** * tracecmd_get_use_trace_clock - return use_trace_clock * @handle: input handle for the trace.dat file */ bool tracecmd_get_use_trace_clock(struct tracecmd_input *handle) { return handle->use_trace_clock; } /** * tracecmd_get_options_offset - get offset of the options sections in the file * @handle: input handle for the trace.dat file */ size_t tracecmd_get_options_offset(struct tracecmd_input *handle) { return handle->options_start; } /** * tracecmd_get_trace_clock - return the saved trace clock * @handle: input handle for the trace.dat file * * Returns a string of the clock that was saved in the trace.dat file. * The string should not be freed, as it points to the internal * structure data. */ const char *tracecmd_get_trace_clock(struct tracecmd_input *handle) { return handle->trace_clock; } /** * tracecmd_get_tsc2nsec - get the calculation numbers to convert to nsecs * @mult: If not NULL, points to where to save the multiplier * @shift: If not NULL, points to where to save the shift. * @offset: If not NULL, points to where to save the offset. * * This only returns a value if the clock is of a raw type. * (currently just x86-tsc is supported). * * Returns 0 on success, or -1 on not supported clock (but may still fill * in the values). */ int tracecmd_get_tsc2nsec(struct tracecmd_input *handle, int *mult, int *shift, unsigned long long *offset) { if (mult) *mult = handle->tsc_calc.mult; if (shift) *shift = handle->tsc_calc.shift; if (offset) *offset = handle->tsc_calc.offset; return handle->top_buffer.clock && (strcmp(handle->top_buffer.clock, "x86-tsc") == 0 || strcmp(handle->top_buffer.clock, "tsc2nsec") == 0) ? 0 : -1; } /** * tracecmd_get_cpustats - return the saved cpu stats * @handle: input handle for the trace.dat file * * Provides a method to extract the cpu stats saved in @handle. * * Returns a string of the cpu stats that was saved in the trace.dat file. * The string should not be freed, as it points to the internal * structure data. */ const char *tracecmd_get_cpustats(struct tracecmd_input *handle) { return handle->cpustats; } /** * tracecmd_get_uname - return the saved name and kernel information * @handle: input handle for the trace.dat file * * Provides a method to extract the system information saved in @handle. * * Returns a string of the system information that was saved in the * trace.dat file. * The string should not be freed, as it points to the internal * structure data. */ const char *tracecmd_get_uname(struct tracecmd_input *handle) { return handle->uname; } /** * tracecmd_get_version - return the saved version information * @handle: input handle for the trace.dat file * * Provides a method to extract the version string saved in @handle. * * Returns a string of the version that was saved in the trace.dat file. * The string should not be freed, as it points to the internal * structure data. */ const char *tracecmd_get_version(struct tracecmd_input *handle) { return handle->version; } /** * tracecmd_get_cpu_file_size - return the saved cpu file size * @handle: input handle for the trace.dat file * @cpu: cpu index * * Provides a method to extract the cpu file size saved in @handle. * * Returns the cpu file size saved in trace.dat file or (off_t)-1 for * invalid cpu index. */ off_t tracecmd_get_cpu_file_size(struct tracecmd_input *handle, int cpu) { if (cpu < 0 || cpu >= handle->cpus) return (off_t)-1; return handle->cpu_data[cpu].file_size; } /** * tracecmd_get_show_data_func - return the show data func * @handle: input handle for the trace.dat file */ tracecmd_show_data_func tracecmd_get_show_data_func(struct tracecmd_input *handle) { return handle->show_data_func; } /** * tracecmd_set_show_data_func - set the show data func * @handle: input handle for the trace.dat file */ void tracecmd_set_show_data_func(struct tracecmd_input *handle, tracecmd_show_data_func func) { handle->show_data_func = func; } /** * tracecmd_get_traceid - get the trace id of the session * @handle: input handle for the trace.dat file * * Returns the trace id, written in the trace file */ unsigned long long tracecmd_get_traceid(struct tracecmd_input *handle) { return handle->trace_id; } /** * tracecmd_get_first_ts - get the timestamp of the first recorded event * @handle: input handle for the trace.dat file * * Returns the timestamp of the first recorded event */ unsigned long long tracecmd_get_first_ts(struct tracecmd_input *handle) { unsigned long long ts = 0; bool first = true; int i; for (i = 0; i < handle->cpus; i++) { /* Ignore empty buffers */ if (!handle->cpu_data[i].size) continue; if (first || ts > handle->cpu_data[i].first_ts) ts = handle->cpu_data[i].first_ts; first = false; } return ts; } /** * tracecmd_get_guest_cpumap - get the mapping of guest VCPU to host process * @handle: input handle for the trace.dat file * @trace_id: ID of the guest tracing session * @name: return, name of the guest * @vcpu_count: return, number of VPUs * @cpu_pid: return, array with guest VCPU to host process mapping * * Returns @name of the guest, number of VPUs (@vcpu_count) * and array @cpu_pid with size @vcpu_count. Array index is VCPU id, array * content is PID of the host process, running this VCPU. * * This information is stored in host trace.dat file */ int tracecmd_get_guest_cpumap(struct tracecmd_input *handle, unsigned long long trace_id, const char **name, int *vcpu_count, const int **cpu_pid) { struct guest_trace_info *guest = handle->guest; while (guest) { if (guest->trace_id == trace_id) break; guest = guest->next; } if (!guest) return -1; if (name) *name = guest->name; if (vcpu_count) *vcpu_count = guest->vcpu_count; if (cpu_pid) *cpu_pid = guest->cpu_pid; return 0; } /** * tracecmd_enable_tsync - enable / disable the timestamps correction * @handle: input handle for the trace.dat file * @enable: enable / disable the timestamps correction * * Enables or disables timestamps correction on file load, using the array of * recorded time offsets. If "enable" is true, but there are no time offsets, * function fails and -1 is returned. * * Returns -1 in case of an error, or 0 otherwise */ int tracecmd_enable_tsync(struct tracecmd_input *handle, bool enable) { if (enable && (!handle->host.ts_offsets || !handle->host.cpu_count)) return -1; handle->host.sync_enable = enable; return 0; } __hidden struct tracecmd_filter *tracecmd_filter_get(struct tracecmd_input *handle) { return handle->filter; } __hidden void tracecmd_filter_set(struct tracecmd_input *handle, struct tracecmd_filter *filter) { /* This can be used to set filter to NULL though. */ if (handle->filter && filter) { tracecmd_warning("Filter exists and setting a new one"); return; } handle->filter = filter; } trace-cmd-v3.3.1/lib/trace-cmd/trace-maps.c000066400000000000000000000104661470231550600203620ustar00rootroot00000000000000#include #include "trace-cmd-local.h" #include "trace-local.h" /* * Structure to hold the mapping between host and guest. * @self - A pointer back to the guest's mapping (for the host copy to use) * @host_handle - The handle for the host for this mapping. * @guest_handle - The handle for the guest for this mapping. * @guest_vcpu - The vCPU # for this mapping. * @host_pid - The pid of the task on the host that runs when this vCPU executes. * @private - Private data for applications to use. */ struct tracecmd_cpu_map { struct tracecmd_cpu_map *self; struct tracecmd_input *host_handle; struct tracecmd_input *guest_handle; int guest_vcpu; int host_pid; void *private; }; static int cmp_map(const void *A, const void *B) { const struct tracecmd_cpu_map *a = A; const struct tracecmd_cpu_map *b = B; if (a->host_pid < b->host_pid) return -1; return a->host_pid > b->host_pid; } int tracecmd_map_vcpus(struct tracecmd_input **handles, int nr_handles) { struct tracecmd_input *host_handle = handles[0]; unsigned long long traceid; struct tracecmd_cpu_map *vcpu_maps = NULL; struct tracecmd_cpu_map *gmap; struct tracecmd_cpu_map *map; const int *cpu_pids; const char *name; int nr_vcpu_maps = 0; int vcpu_count; int mappings = 0; int ret; int i, k; /* handles[0] is the host handle, do for each guest handle */ for (i = 1; i < nr_handles; i++) { traceid = tracecmd_get_traceid(handles[i]); /* * Retrieve the host mapping of the guest for this handle. * cpu_pids is an array of pids that map 1-1 the host vcpus where * cpu_pids[vCPU_num] = host_task_pid */ ret = tracecmd_get_guest_cpumap(host_handle, traceid, &name, &vcpu_count, &cpu_pids); if (ret) continue; mappings++; gmap = calloc(sizeof(*gmap), vcpu_count); if (!gmap) goto fail; for (k = 0; k < vcpu_count; k++) { gmap[k].host_handle = handles[0]; gmap[k].guest_handle = handles[i]; gmap[k].guest_vcpu = k; gmap[k].host_pid = cpu_pids[k]; gmap[k].self = &gmap[k]; } trace_set_guest_map(handles[i], gmap); trace_set_guest_map_cnt(handles[i], vcpu_count); /* Update the host mapping of all guests to the host */ map = realloc(vcpu_maps, sizeof(*map) * (nr_vcpu_maps + vcpu_count)); if (!map) goto fail; memset(map + nr_vcpu_maps, 0, sizeof(*map) * (vcpu_count - nr_vcpu_maps)); vcpu_maps = map; map += nr_vcpu_maps; nr_vcpu_maps += vcpu_count; for (k = 0; k < vcpu_count; k++) map[k] = gmap[k]; } if (!vcpu_maps) return 0; /* We want to do a binary search via host_pid to find these mappings */ qsort(vcpu_maps, nr_vcpu_maps, sizeof(*map), cmp_map); trace_set_guest_map(handles[0], vcpu_maps); trace_set_guest_map_cnt(handles[0], nr_vcpu_maps); return mappings; fail: free(vcpu_maps); return -1; } __hidden void trace_guest_map_free(struct tracecmd_cpu_map *map) { free(map); } struct tracecmd_cpu_map *tracecmd_map_find_by_host_pid(struct tracecmd_input *handle, int host_pid) { struct tracecmd_cpu_map *map; struct tracecmd_cpu_map key; int nr_maps; map = trace_get_guest_map(handle); if (!map) return NULL; /* The handle could be from the guest, get the host handle */ handle = map->host_handle; /* And again, get the mapping of the host, as it has all the mappings */ map = trace_get_guest_map(handle); if (!map) return NULL; nr_maps = trace_get_guest_map_cnt(handle); key.host_pid = host_pid; map = bsearch(&key, map, nr_maps, sizeof(*map), cmp_map); return map ? map->self : NULL; } void tracecmd_map_set_private(struct tracecmd_cpu_map *map, void *priv) { /* Only set the guest private */ map = map->self; map->private = priv; } void *tracecmd_map_get_private(struct tracecmd_cpu_map *map) { /* Return the guest private */ map = map->self; return map->private; } struct tracecmd_input *tracecmd_map_get_guest(struct tracecmd_cpu_map *map) { return map->guest_handle; } int tracecmd_map_get_host_pid(struct tracecmd_cpu_map *map) { return map->host_pid; } struct tracecmd_cpu_map *tracecmd_get_cpu_map(struct tracecmd_input *handle, int cpu) { struct tracecmd_cpu_map *map; int cnt; map = trace_get_guest_map(handle); /* Make sure it's for the guest handle, as this could be a host handle */ map = map->self; cnt = trace_get_guest_map_cnt(map->guest_handle); if (cnt <= cpu) return NULL; return map + cpu; } trace-cmd-v3.3.1/lib/trace-cmd/trace-msg.c000066400000000000000000001050431470231550600202040ustar00rootroot00000000000000// SPDX-License-Identifier: LGPL-2.1 /* * trace-msg.c : define message protocol for communication between clients and * a server * * Copyright (C) 2013 Hitachi, Ltd. * Created by Yoshihiro YUNOMAE * */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "trace-write-local.h" #include "trace-cmd-local.h" #include "trace-local.h" #include "trace-msg.h" #include "trace-cmd.h" typedef __u32 u32; typedef __be32 be32; #define dprint(fmt, ...) tracecmd_debug(fmt, ##__VA_ARGS__) /* Two (4k) pages is the max transfer for now */ #define MSG_MAX_LEN 8192 #define MSG_HDR_LEN sizeof(struct tracecmd_msg_header) #define MSG_MAX_DATA_LEN (MSG_MAX_LEN - MSG_HDR_LEN) unsigned int page_size; struct tracecmd_msg_tinit { be32 cpus; be32 page_size; be32 opt_num; } __packed; struct tracecmd_msg_rinit { be32 cpus; } __packed; #define TRACE_REQ_PARAM_SIZE (2 * sizeof(int)) enum trace_req_params { TRACE_REQUEST_ARGS, TRACE_REQUEST_TSYNC_PROTOS, }; struct tracecmd_msg_trace_req_param { int id; int length; char *value; }; struct tracecmd_msg_trace_req { be32 flags; be32 argc; u64 trace_id; } __packed; struct tracecmd_msg_trace_proxy { struct tracecmd_msg_trace_req req; be32 cpus; be32 siblings; } __packed; struct tracecmd_msg_trace_resp { be32 flags; be32 cpus; be32 page_size; u64 trace_id; char tsync_proto_name[TRACECMD_TSYNC_PNAME_LENGTH]; be32 tsync_port; } __packed; struct tracecmd_msg_tsync { char sync_protocol_name[TRACECMD_TSYNC_PNAME_LENGTH]; be32 sync_msg_id; } __packed; struct tracecmd_msg_header { be32 size; be32 cmd; be32 cmd_size; } __packed; #define MSG_MAP \ C(CLOSE, 0, 0), \ C(TINIT, 1, sizeof(struct tracecmd_msg_tinit)), \ C(RINIT, 2, sizeof(struct tracecmd_msg_rinit)), \ C(SEND_DATA, 3, 0), \ C(FIN_DATA, 4, 0), \ C(NOT_SUPP, 5, 0), \ C(TRACE_REQ, 6, sizeof(struct tracecmd_msg_trace_req)), \ C(TRACE_RESP, 7, sizeof(struct tracecmd_msg_trace_resp)),\ C(CLOSE_RESP, 8, 0), \ C(TIME_SYNC, 9, sizeof(struct tracecmd_msg_tsync)), \ C(TRACE_PROXY, 10, sizeof(struct tracecmd_msg_trace_proxy)), \ C(CONT, 11, 0), #undef C #define C(a,b,c) MSG_##a = b enum tracecmd_msg_cmd { MSG_MAP MSG_NR_COMMANDS }; #undef C #define C(a,b,c) c static be32 msg_cmd_sizes[] = { MSG_MAP }; #undef C #define C(a,b,c) #a static const char *msg_names[] = { MSG_MAP }; static const char *cmd_to_name(int cmd) { if (cmd < 0 || cmd >= MSG_NR_COMMANDS) return "Unknown"; return msg_names[cmd]; } struct tracecmd_msg { struct tracecmd_msg_header hdr; union { struct tracecmd_msg_tinit tinit; struct tracecmd_msg_rinit rinit; struct tracecmd_msg_trace_req trace_req; struct tracecmd_msg_trace_proxy trace_proxy; struct tracecmd_msg_trace_resp trace_resp; struct tracecmd_msg_tsync tsync; }; char *buf; } __packed; static inline int msg_buf_len(struct tracecmd_msg *msg) { return ntohl(msg->hdr.size) - MSG_HDR_LEN - ntohl(msg->hdr.cmd_size); } static int __msg_write(int fd, struct tracecmd_msg *msg, bool network) { int msg_size, data_size; int ret; int cmd; if (network) { cmd = ntohl(msg->hdr.cmd); if (cmd < 0 || cmd >= MSG_NR_COMMANDS) return -EINVAL; dprint("msg send: %d (%s) [%d]\n", cmd, cmd_to_name(cmd), ntohl(msg->hdr.size)); } msg_size = MSG_HDR_LEN + ntohl(msg->hdr.cmd_size); data_size = ntohl(msg->hdr.size) - msg_size; if (data_size < 0) return -EINVAL; if (network) { ret = __do_write_check(fd, msg, msg_size); if (ret < 0) return ret; } if (!data_size) return 0; return __do_write_check(fd, msg->buf, data_size); } __hidden off_t msg_lseek(struct tracecmd_msg_handle *msg_handle, off_t offset, int whence) { off_t cache_offset = msg_handle->cache_start_offset; off_t ret; /* * lseek works only if the handle is in cache mode, * cannot seek on a network socket */ if (!msg_handle->cache || msg_handle->cfd < 0) return (off_t)-1; if (whence == SEEK_SET) { if (offset < cache_offset) return (off_t)-1; offset -= cache_offset; } ret = lseek(msg_handle->cfd, offset, whence); if (ret == (off_t)-1) return ret; return ret + cache_offset; } static int msg_write(struct tracecmd_msg_handle *msg_handle, struct tracecmd_msg *msg) { if (msg_handle->cache && msg_handle->cfd >= 0) return __msg_write(msg_handle->cfd, msg, false); return __msg_write(msg_handle->fd, msg, true); } enum msg_trace_flags { MSG_TRACE_USE_FIFOS = 1 << 0, }; static int make_tinit(struct tracecmd_msg_handle *msg_handle, struct tracecmd_msg *msg) { int cpu_count = msg_handle->cpu_count; int opt_num = 0; int data_size = 0; if (msg_handle->flags & (TRACECMD_MSG_FL_USE_TCP | TRACECMD_MSG_FL_USE_VSOCK)) { msg->buf = msg_handle->flags & TRACECMD_MSG_FL_USE_TCP ? strdup("tcp") : strdup("vsock"); if (!msg->buf) return -1; opt_num++; data_size += strlen(msg->buf) + 1; } msg->tinit.cpus = htonl(cpu_count); msg->tinit.page_size = htonl(page_size); msg->tinit.opt_num = htonl(opt_num); msg->hdr.size = htonl(ntohl(msg->hdr.size) + data_size); return 0; } /* test a to u */ static int tatou(const char *s, unsigned int *res) { long r; r = atol(s); if (r >= 0 && r <= UINT_MAX) { *res = (unsigned int)r; return 0; } return -1; } static int write_uints(char *buf, size_t buf_len, unsigned int *arr, int arr_len) { int i, ret, tot = 0; for (i = 0; i < arr_len; i++) { ret = snprintf(buf, buf_len, "%u", arr[i]); if (ret < 0) return ret; /* Count the '\0' byte */ ret++; tot += ret; if (buf) buf += ret; if (buf_len >= ret) buf_len -= ret; else buf_len = 0; } return tot; } static int make_rinit(struct tracecmd_msg *msg, int cpus, unsigned int *ports) { int data_size; data_size = write_uints(NULL, 0, ports, cpus); msg->buf = malloc(data_size); if (!msg->buf) return -ENOMEM; write_uints(msg->buf, data_size, ports, cpus); msg->rinit.cpus = htonl(cpus); msg->hdr.size = htonl(ntohl(msg->hdr.size) + data_size); return 0; } static void tracecmd_msg_init(u32 cmd, struct tracecmd_msg *msg) { memset(msg, 0, sizeof(*msg)); msg->hdr.size = htonl(MSG_HDR_LEN + msg_cmd_sizes[cmd]); msg->hdr.cmd = htonl(cmd); msg->hdr.cmd_size = htonl(msg_cmd_sizes[cmd]); } static void msg_free(struct tracecmd_msg *msg) { free(msg->buf); memset(msg, 0, sizeof(*msg)); } static int tracecmd_msg_send(struct tracecmd_msg_handle *msg_handle, struct tracecmd_msg *msg) { int ret = 0; ret = msg_write(msg_handle, msg); if (ret < 0) ret = -ECOMM; msg_free(msg); return ret; } static int msg_send_nofree(struct tracecmd_msg_handle *msg_handle, struct tracecmd_msg *msg) { int ret = 0; ret = msg_write(msg_handle, msg); if (ret < 0) ret = -ECOMM; return ret; } static int msg_read(int fd, void *buf, u32 size, int *n) { ssize_t r; while (size) { r = read(fd, buf + *n, size); if (r < 0) { if (errno == EINTR) continue; return -errno; } else if (!r) return -ENOTCONN; size -= r; *n += r; } return 0; } static char scratch_buf[MSG_MAX_LEN]; static int msg_read_extra(int fd, struct tracecmd_msg *msg, int *n, int size) { int cmd, cmd_size, rsize; int ret; cmd = ntohl(msg->hdr.cmd); if (cmd < 0 || cmd >= MSG_NR_COMMANDS) return -EINVAL; cmd_size = ntohl(msg->hdr.cmd_size); if (cmd_size < 0) return -EINVAL; if (cmd_size > 0) { rsize = cmd_size; if (rsize > msg_cmd_sizes[cmd]) rsize = msg_cmd_sizes[cmd]; ret = msg_read(fd, msg, rsize, n); if (ret < 0) return ret; ret = msg_read(fd, scratch_buf, cmd_size - rsize, n); if (ret < 0) return ret; } if (size > *n) { size -= *n; msg->buf = malloc(size); if (!msg->buf) return -ENOMEM; *n = 0; return msg_read(fd, msg->buf, size, n); } return 0; } /* * Read header information of msg first, then read all data */ static int tracecmd_msg_recv(int fd, struct tracecmd_msg *msg) { u32 size = 0; int n = 0; int ret; ret = msg_read(fd, msg, MSG_HDR_LEN, &n); if (ret < 0) return ret; dprint("msg received: %d (%s) [%d]\n", ntohl(msg->hdr.cmd), cmd_to_name(ntohl(msg->hdr.cmd)), ntohl(msg->hdr.size)); size = ntohl(msg->hdr.size); if (size > MSG_MAX_LEN) /* too big */ goto error; else if (size < MSG_HDR_LEN) /* too small */ goto error; else if (size > MSG_HDR_LEN) return msg_read_extra(fd, msg, &n, size); return 0; error: tracecmd_plog("Receive an invalid message(size=%d)\n", size); return -ENOMSG; } #define MSG_WAIT_MSEC 5000 static int msg_wait_to = MSG_WAIT_MSEC; bool tracecmd_msg_done(struct tracecmd_msg_handle *msg_handle) { return (volatile int)msg_handle->done; } void tracecmd_msg_set_done(struct tracecmd_msg_handle *msg_handle) { msg_handle->done = true; } static void error_operation(struct tracecmd_msg *msg) { tracecmd_warning("Message: cmd=%d size=%d", ntohl(msg->hdr.cmd), ntohl(msg->hdr.size)); } /* * A return value of 0 indicates time-out */ static int tracecmd_msg_recv_wait(int fd, struct tracecmd_msg *msg) { struct pollfd pfd; int ret; pfd.fd = fd; pfd.events = POLLIN; ret = poll(&pfd, 1, tracecmd_get_notimeout() ? -1 : msg_wait_to); if (ret < 0) return -errno; else if (ret == 0) return -ETIMEDOUT; return tracecmd_msg_recv(fd, msg); } static int tracecmd_msg_wait_for_msg(int fd, struct tracecmd_msg *msg) { u32 cmd; int ret; ret = tracecmd_msg_recv_wait(fd, msg); if (ret < 0) { if (ret == -ETIMEDOUT) tracecmd_warning("Connection timed out"); return ret; } cmd = ntohl(msg->hdr.cmd); if (cmd == MSG_CLOSE) return -ECONNABORTED; return 0; } static int tracecmd_msg_send_notsupp(struct tracecmd_msg_handle *msg_handle) { struct tracecmd_msg msg; tracecmd_msg_init(MSG_NOT_SUPP, &msg); return tracecmd_msg_send(msg_handle, &msg); } static int handle_unexpected_msg(struct tracecmd_msg_handle *msg_handle, struct tracecmd_msg *msg) { /* Don't send MSG_NOT_SUPP back if we just received one */ if (ntohl(msg->hdr.cmd) == MSG_NOT_SUPP) return 0; return tracecmd_msg_send_notsupp(msg_handle); } int tracecmd_msg_send_init_data(struct tracecmd_msg_handle *msg_handle, unsigned int **client_ports) { struct tracecmd_msg msg; unsigned int *ports; int i, cpus, ret; char *p, *buf_end; ssize_t buf_len; *client_ports = NULL; tracecmd_msg_init(MSG_TINIT, &msg); ret = make_tinit(msg_handle, &msg); if (ret < 0) goto out; ret = tracecmd_msg_send(msg_handle, &msg); if (ret < 0) goto out; msg_free(&msg); ret = tracecmd_msg_wait_for_msg(msg_handle->fd, &msg); if (ret < 0) goto out; if (ntohl(msg.hdr.cmd) != MSG_RINIT) { ret = -EOPNOTSUPP; goto error; } buf_len = msg_buf_len(&msg); if (buf_len <= 0) { ret = -EINVAL; goto error; } if (msg.buf[buf_len-1] != '\0') { ret = -EINVAL; goto error; } cpus = ntohl(msg.rinit.cpus); ports = malloc(sizeof(*ports) * cpus); if (!ports) { ret = -ENOMEM; goto out; } buf_end = msg.buf + buf_len; for (i = 0, p = msg.buf; i < cpus; i++, p++) { if (p >= buf_end || tatou(p, &ports[i])) { free(ports); ret = -EINVAL; goto error; } p = strchr(p, '\0'); } *client_ports = ports; msg_free(&msg); return 0; error: error_operation(&msg); if (ret == -EOPNOTSUPP) handle_unexpected_msg(msg_handle, &msg); out: msg_free(&msg); return ret; } static bool process_option(struct tracecmd_msg_handle *msg_handle, const char *opt) { if (strcmp(opt, "tcp") == 0) { msg_handle->flags |= TRACECMD_MSG_FL_USE_TCP; return true; } if (strcmp(opt, "vsock") == 0) { msg_handle->flags |= TRACECMD_MSG_FL_USE_VSOCK; return true; } return false; } struct tracecmd_msg_handle * tracecmd_msg_handle_alloc(int fd, unsigned long flags) { struct tracecmd_msg_handle *handle; handle = calloc(1, sizeof(struct tracecmd_msg_handle)); if (!handle) return NULL; handle->fd = fd; handle->flags = flags; handle->cfd = -1; handle->cache = false; return handle; } int tracecmd_msg_handle_cache(struct tracecmd_msg_handle *msg_handle) { if (msg_handle->cfd < 0) { #ifdef HAVE_MEMFD_CREATE msg_handle->cfd = memfd_create("trace_msg_cache", 0); if (msg_handle->cfd < 0) return -1; #else strcpy(msg_handle->cfile, MSG_CACHE_FILE); msg_handle->cfd = mkstemp(msg_handle->cfile); if (msg_handle->cfd < 0) return -1; unlink(msg_handle->cfile); #endif } msg_handle->cache = true; return 0; } static int flush_cache(struct tracecmd_msg_handle *msg_handle) { char buf[MSG_MAX_DATA_LEN]; int fd = msg_handle->cfd; int ret; if (!msg_handle->cache || fd < 0) return 0; msg_handle->cache = false; if (lseek(fd, 0, SEEK_SET) == (off_t)-1) return -1; do { ret = read(fd, buf, MSG_MAX_DATA_LEN); if (ret <= 0) break; ret = tracecmd_msg_data_send(msg_handle, buf, ret); if (ret < 0) break; } while (ret >= 0); msg_handle->cache_start_offset = lseek(fd, 0, SEEK_CUR); if (msg_handle->cache_start_offset == (off_t)-1) return -1; close(fd); msg_handle->cfd = -1; return ret; } void tracecmd_msg_handle_close(struct tracecmd_msg_handle *msg_handle) { if (msg_handle->fd >= 0) close(msg_handle->fd); if (msg_handle->cfd >= 0) close(msg_handle->cfd); free(msg_handle); } #define MAX_OPTION_SIZE 4096 int tracecmd_msg_initial_setting(struct tracecmd_msg_handle *msg_handle) { struct tracecmd_msg msg; char *p, *buf_end; ssize_t buf_len; int pagesize; int options, i; int cpus; int ret; memset(&msg, 0, sizeof(msg)); ret = tracecmd_msg_recv_wait(msg_handle->fd, &msg); if (ret < 0) { if (ret == -ETIMEDOUT) tracecmd_warning("Connection timed out"); return ret; } if (ntohl(msg.hdr.cmd) != MSG_TINIT) { ret = -EOPNOTSUPP; goto error; } cpus = ntohl(msg.tinit.cpus); tracecmd_plog("cpus=%d\n", cpus); if (cpus < 0) { ret = -EINVAL; goto error; } msg_handle->cpu_count = cpus; pagesize = ntohl(msg.tinit.page_size); tracecmd_plog("pagesize=%d\n", pagesize); if (pagesize <= 0) { ret = -EINVAL; goto error; } buf_len = msg_buf_len(&msg); if (buf_len < 0) { ret = -EINVAL; goto error; } if (buf_len == 0) goto no_options; if (msg.buf[buf_len-1] != '\0') { ret = -EINVAL; goto error; } buf_end = msg.buf + buf_len; options = ntohl(msg.tinit.opt_num); for (i = 0, p = msg.buf; i < options; i++, p++) { if (p >= buf_end) { ret = -EINVAL; goto error; } /* do we understand this option? */ if (!process_option(msg_handle, p)) tracecmd_plog("Cannot understand option '%s'\n", p); p = strchr(p, '\0'); } no_options: msg_free(&msg); return pagesize; error: error_operation(&msg); if (ret == -EOPNOTSUPP) handle_unexpected_msg(msg_handle, &msg); msg_free(&msg); return ret; } int tracecmd_msg_send_port_array(struct tracecmd_msg_handle *msg_handle, unsigned int *ports) { struct tracecmd_msg msg; int ret; tracecmd_msg_init(MSG_RINIT, &msg); ret = make_rinit(&msg, msg_handle->cpu_count, ports); if (ret < 0) return ret; ret = tracecmd_msg_send(msg_handle, &msg); if (ret < 0) return ret; return 0; } int tracecmd_msg_send_close_msg(struct tracecmd_msg_handle *msg_handle) { struct tracecmd_msg msg; tracecmd_msg_init(MSG_CLOSE, &msg); return tracecmd_msg_send(msg_handle, &msg); } int tracecmd_msg_send_close_resp_msg(struct tracecmd_msg_handle *msg_handle) { struct tracecmd_msg msg; tracecmd_msg_init(MSG_CLOSE_RESP, &msg); return tracecmd_msg_send(msg_handle, &msg); } int tracecmd_msg_cont(struct tracecmd_msg_handle *msg_handle) { struct tracecmd_msg msg; tracecmd_msg_init(MSG_CONT, &msg); return tracecmd_msg_send(msg_handle, &msg); } int tracecmd_msg_data_send(struct tracecmd_msg_handle *msg_handle, const char *buf, int size) { struct tracecmd_msg msg; int n; int ret; int count = 0; /* Don't bother doing anything if there's nothing to do */ if (!size) return 0; tracecmd_msg_init(MSG_SEND_DATA, &msg); msg.buf = malloc(MSG_MAX_DATA_LEN); if (!msg.buf) return -ENOMEM; msg.hdr.size = htonl(MSG_MAX_LEN); n = size; while (n) { if (n > MSG_MAX_DATA_LEN) { memcpy(msg.buf, buf + count, MSG_MAX_DATA_LEN); n -= MSG_MAX_DATA_LEN; count += MSG_MAX_DATA_LEN; } else { msg.hdr.size = htonl(MSG_HDR_LEN + n); memcpy(msg.buf, buf + count, n); n = 0; } ret = msg_write(msg_handle, &msg); if (ret < 0) break; } msg_free(&msg); return ret; } /** * tracecmd_msg_send_options - Send options over the network * @msg_handle: message handle, holding the communication context * @handle: The output file that has the options to send * * Send options over the network. This is used when the output handle * has more options to send over the network after the trace. Some * options are sent before, and some sent afterward. Since the receiving * side needs to know the location to update the indexes, it will * handle the section header. This just sends out the raw content to * the receiver (requires that both sides have the same endianess, as * no conversion is made of the content of the options). * * Returns 0 on success and -1 on error. */ int tracecmd_msg_send_options(struct tracecmd_msg_handle *msg_handle, struct tracecmd_output *handle) { struct tracecmd_msg msg; size_t len; void *buf; int ret; buf = trace_get_options(handle, &len); if (!buf) return -1; ret = tracecmd_msg_data_send(msg_handle, buf, len); free(buf); if (ret < 0) return ret; tracecmd_msg_init(MSG_FIN_DATA, &msg); return tracecmd_msg_send(msg_handle, &msg); } /** * tracecmd_msg_flush_data - Send the current cache data over the network * @msg_handle: message handle, holding the communication context * * Send the content in the cache file over the nework, reset the file * and start the cache up again (with nothing in it). */ int tracecmd_msg_flush_data(struct tracecmd_msg_handle *msg_handle) { struct tracecmd_msg msg; int ret; flush_cache(msg_handle); tracecmd_msg_init(MSG_FIN_DATA, &msg); ret = tracecmd_msg_send(msg_handle, &msg); if (ret < 0) return ret; return tracecmd_msg_handle_cache(msg_handle); } int tracecmd_msg_finish_sending_data(struct tracecmd_msg_handle *msg_handle) { struct tracecmd_msg msg; int ret; flush_cache(msg_handle); tracecmd_msg_init(MSG_FIN_DATA, &msg); ret = tracecmd_msg_send(msg_handle, &msg); if (ret < 0) return ret; return 0; } static int read_msg_data(struct tracecmd_msg_handle *msg_handle, struct tracecmd_msg *msg) { int cmd; int ret; ret = tracecmd_msg_recv_wait(msg_handle->fd, msg); if (ret < 0) { tracecmd_warning("reading client %d (%s)", ret, strerror(ret)); return ret; } cmd = ntohl(msg->hdr.cmd); if (cmd == MSG_FIN_DATA) { /* Finish receiving data */ return 0; } else if (cmd != MSG_SEND_DATA) { ret = handle_unexpected_msg(msg_handle, msg); if (ret < 0) return -1; return 0; } return msg_buf_len(msg); } /** * tracecmd_msg_read_options - Receive options from over the network * @msg_handle: message handle, holding the communication context * @handle: The output file to write the options to. * * Receive the options sent by tracecmd_msg_send_options(). * See that function's documentation for mor details. * * Returns 0 on success and -1 on error. */ int tracecmd_msg_read_options(struct tracecmd_msg_handle *msg_handle, struct tracecmd_output *handle) { struct tracecmd_msg msg; size_t len = 0; void *buf = NULL; void *tmp; int ret; int n; memset(&msg, 0, sizeof(msg)); while (!tracecmd_msg_done(msg_handle)) { n = read_msg_data(msg_handle, &msg); if (n <= 0) break; tmp = realloc(buf, n + len); if (!tmp) goto error; buf = tmp; memcpy(buf + len, msg.buf, n); len += n; msg_free(&msg); } msg_free(&msg); ret = trace_append_options(handle, buf, len); free(buf); return ret; error: msg_free(&msg); free(buf); return -1; } int tracecmd_msg_read_data(struct tracecmd_msg_handle *msg_handle, int ofd) { struct tracecmd_msg msg; int t, n; ssize_t s; int ret; memset(&msg, 0, sizeof(msg)); while (!tracecmd_msg_done(msg_handle)) { n = read_msg_data(msg_handle, &msg); if (n <= 0) break; t = n; s = 0; while (t > 0) { s = write(ofd, msg.buf+s, t); if (s < 0) { if (errno == EINTR) continue; tracecmd_warning("writing to file"); ret = -errno; goto error; } t -= s; s = n - t; } msg_free(&msg); } msg_free(&msg); return 0; error: error_operation(&msg); msg_free(&msg); return ret; } int tracecmd_msg_collect_data(struct tracecmd_msg_handle *msg_handle, int ofd) { int ret; ret = tracecmd_msg_read_data(msg_handle, ofd); if (ret) return ret; return tracecmd_msg_wait_close(msg_handle); } static int tracecmd_msg_wait_for_cmd(struct tracecmd_msg_handle *msg_handle, enum tracecmd_msg_cmd cmd) { struct tracecmd_msg msg; int ret = -1; memset(&msg, 0, sizeof(msg)); while (!tracecmd_msg_done(msg_handle)) { ret = tracecmd_msg_recv(msg_handle->fd, &msg); if (ret < 0) goto error; if (ntohl(msg.hdr.cmd) == cmd) return 0; error_operation(&msg); ret = handle_unexpected_msg(msg_handle, &msg); if (ret < 0) goto error; msg_free(&msg); } error: msg_free(&msg); return ret; } int tracecmd_msg_wait(struct tracecmd_msg_handle *msg_handle) { return tracecmd_msg_wait_for_cmd(msg_handle, MSG_CONT); } int tracecmd_msg_wait_close(struct tracecmd_msg_handle *msg_handle) { return tracecmd_msg_wait_for_cmd(msg_handle, MSG_CLOSE); } int tracecmd_msg_wait_close_resp(struct tracecmd_msg_handle *msg_handle) { return tracecmd_msg_wait_for_cmd(msg_handle, MSG_CLOSE_RESP); } static int make_trace_req_protos(char **buf, int *size, struct tracecmd_tsync_protos *protos) { int protos_size = 1; size_t buf_size; char **names; char *nbuf; char *p; names = protos->names; while (*names) { protos_size += strlen(*names) + 1; names++; } buf_size = TRACE_REQ_PARAM_SIZE + protos_size; nbuf = realloc(*buf, *size + buf_size); if (!nbuf) return -1; p = nbuf + *size; memset(p, 0, buf_size); *(unsigned int *)p = htonl(TRACE_REQUEST_TSYNC_PROTOS); p += sizeof(int); *(unsigned int *)p = htonl(protos_size); p += sizeof(int); names = protos->names; while (*names) { strcpy(p, *names); p += strlen(*names) + 1; names++; } p = NULL; *size += buf_size; *buf = nbuf; return 0; } static int make_trace_req_args(char **buf, int *size, int argc, char **argv) { size_t args_size; size_t buf_size; char *nbuf; char *p; int i; args_size = sizeof(int); for (i = 0; i < argc; i++) args_size += strlen(argv[i]) + 1; buf_size = TRACE_REQ_PARAM_SIZE + args_size; nbuf = realloc(*buf, *size + buf_size); if (!nbuf) return -1; p = nbuf + *size; memset(p, 0, buf_size); *(unsigned int *)p = htonl(TRACE_REQUEST_ARGS); p += sizeof(int); *(unsigned int *)p = htonl(args_size); p += sizeof(int); *(unsigned int *)p = htonl(argc); p += sizeof(int); for (i = 0; i < argc; i++) p = stpcpy(p, argv[i]) + 1; *size += buf_size; *buf = nbuf; return 0; } static int make_trace_req(struct tracecmd_msg *msg, int argc, char **argv, bool use_fifos, unsigned long long trace_id, struct tracecmd_tsync_protos *protos) { int size = 0; char *buf = NULL; msg->trace_req.flags = 0; if (use_fifos) msg->trace_req.flags |= MSG_TRACE_USE_FIFOS; msg->trace_req.flags = htonl(msg->trace_req.flags); msg->trace_req.trace_id = htonll(trace_id); if (argc && argv) make_trace_req_args(&buf, &size, argc, argv); if (protos && protos->names) make_trace_req_protos(&buf, &size, protos); msg->buf = buf; msg->hdr.size = htonl(ntohl(msg->hdr.size) + size); return size; } int tracecmd_msg_send_trace_req(struct tracecmd_msg_handle *msg_handle, int argc, char **argv, bool use_fifos, unsigned long long trace_id, struct tracecmd_tsync_protos *protos) { struct tracecmd_msg msg; int ret; tracecmd_msg_init(MSG_TRACE_REQ, &msg); ret = make_trace_req(&msg, argc, argv, use_fifos, trace_id, protos); if (ret < 0) return ret; return tracecmd_msg_send(msg_handle, &msg); } int tracecmd_msg_send_trace_proxy(struct tracecmd_msg_handle *msg_handle, int argc, char **argv, bool use_fifos, unsigned long long trace_id, struct tracecmd_tsync_protos *protos, unsigned int nr_cpus, unsigned int siblings) { struct tracecmd_msg msg; int ret; tracecmd_msg_init(MSG_TRACE_PROXY, &msg); ret = make_trace_req(&msg, argc, argv, use_fifos, trace_id, protos); if (ret < 0) return ret; msg.trace_proxy.cpus = htonl(nr_cpus); msg.trace_proxy.siblings = htonl(siblings); return tracecmd_msg_send(msg_handle, &msg); } static int get_trace_req_protos(char *buf, int length, struct tracecmd_tsync_protos **protos) { struct tracecmd_tsync_protos *plist = NULL; int count = 0; char *p; int i, j; i = length; p = buf; while (i > 0) { i -= strlen(p) + 1; count++; p += strlen(p) + 1; } plist = calloc(1, sizeof(struct tracecmd_tsync_protos)); if (!plist) goto error; plist->names = calloc(count + 1, sizeof(char *)); if (!plist->names) goto error; i = length; p = buf; j = 0; while (i > 0 && j < (count - 1)) { i -= strlen(p) + 1; plist->names[j++] = strdup(p); p += strlen(p) + 1; } *protos = plist; return 0; error: if (plist) { free(plist->names); free(plist); } return -1; } static int get_trace_req_args(char *buf, int length, int *argc, char ***argv) { unsigned int nr_args; char *p, *buf_end; char **args = NULL; int ret; int i; if (length <= sizeof(int) || buf[length - 1] != '\0') { ret = -EINVAL; goto out; } nr_args = ntohl(*(unsigned int *)buf); buf += sizeof(int); length -= sizeof(int); args = calloc(nr_args, sizeof(*args)); if (!args) { ret = -ENOMEM; goto out; } buf_end = buf + length; for (i = 0, p = buf; i < nr_args; i++, p++) { if (p >= buf_end) { ret = -EINVAL; goto out; } args[i] = p; p = strchr(p, '\0'); } *argc = nr_args; *argv = args; return 0; out: free(args); return ret; } static int msg_recv_trace_req_proxy(struct tracecmd_msg_handle *msg_handle, int *argc, char ***argv, bool *use_fifos, unsigned long long *trace_id, struct tracecmd_tsync_protos **protos, unsigned int *cpus, unsigned int *siblings) { struct tracecmd_msg msg; unsigned int param_id; int param_length; ssize_t buf_len; char *p; int ret; ret = tracecmd_msg_recv(msg_handle->fd, &msg); if (ret < 0) return ret; switch (ntohl(msg.hdr.cmd)) { case MSG_TRACE_PROXY: if (cpus) *cpus = ntohl(msg.trace_proxy.cpus); if (siblings) *siblings = ntohl(msg.trace_proxy.siblings); /* fall through */ case MSG_TRACE_REQ: break; default: ret = -ENOTSUP; goto out; } buf_len = ntohl(msg.hdr.size) - MSG_HDR_LEN - ntohl(msg.hdr.cmd_size); if (buf_len < 0) { ret = -EINVAL; goto out; } *use_fifos = ntohl(msg.trace_req.flags) & MSG_TRACE_USE_FIFOS; *trace_id = ntohll(msg.trace_req.trace_id); p = msg.buf; while (buf_len > 2 * sizeof(int)) { param_id = ntohl(*((unsigned int *)p)); p += sizeof(int); buf_len -= sizeof(int); param_length = ntohl(*((unsigned int *)p)); p += sizeof(int); buf_len -= sizeof(int); if (buf_len < param_length) break; ret = 0; switch (param_id) { case TRACE_REQUEST_ARGS: ret = get_trace_req_args(p, param_length, argc, argv); break; case TRACE_REQUEST_TSYNC_PROTOS: ret = get_trace_req_protos(p, param_length, protos); break; default: break; } if (ret) break; buf_len -= param_length; p += param_length; } msg_free(&msg); return 0; out: error_operation(&msg); if (ret == -EOPNOTSUPP) handle_unexpected_msg(msg_handle, &msg); msg_free(&msg); return ret; } /* * NOTE: On success, the returned `argv` should be freed with: * free(argv[0]); * free(argv); * and `tsync_protos` with free(tsync_protos); */ int tracecmd_msg_recv_trace_req(struct tracecmd_msg_handle *msg_handle, int *argc, char ***argv, bool *use_fifos, unsigned long long *trace_id, struct tracecmd_tsync_protos **protos) { return msg_recv_trace_req_proxy(msg_handle, argc, argv, use_fifos, trace_id, protos, NULL, NULL); } /* * NOTE: On success, the returned `argv` should be freed with: * free(argv[0]); * free(argv); * and `tsync_protos` with free(tsync_protos); */ int tracecmd_msg_recv_trace_proxy(struct tracecmd_msg_handle *msg_handle, int *argc, char ***argv, bool *use_fifos, unsigned long long *trace_id, struct tracecmd_tsync_protos **protos, unsigned int *cpus, unsigned int *siblings) { return msg_recv_trace_req_proxy(msg_handle, argc, argv, use_fifos, trace_id, protos, cpus, siblings); } /** * tracecmd_msg_send_time_sync - Send a time sync packet * @msg_handle: message handle, holding the communication context * @sync_protocol: name of the time synch protocol, string up to * TRACECMD_TSYNC_PNAME_LENGTH characters length. * @sync_msg_id: id if the time synch message, protocol dependent * @payload_size: size of the packet payload, 0 in case of no payload * @payload: pointer to the packet payload, or NULL in case of no payload * * Returns 0 if packet is sent successfully, or negative error otherwise. */ int tracecmd_msg_send_time_sync(struct tracecmd_msg_handle *msg_handle, char *sync_protocol, unsigned int sync_msg_id, unsigned int payload_size, char *payload) { struct tracecmd_msg msg; tracecmd_msg_init(MSG_TIME_SYNC, &msg); strncpy(msg.tsync.sync_protocol_name, sync_protocol, TRACECMD_TSYNC_PNAME_LENGTH); msg.tsync.sync_msg_id = htonl(sync_msg_id); msg.hdr.size = htonl(ntohl(msg.hdr.size) + payload_size); msg.buf = payload; return msg_send_nofree(msg_handle, &msg); } /** * tracecmd_msg_recv_time_sync - Receive a time sync packet * @msg_handle: message handle, holding the communication context * @sync_protocol: return the name of the packet's time synch protocol. * It must point to a prealocated buffer with size * TRACECMD_TSYNC_PNAME_LENGTH * @sync_msg_id: return the id of the packet's time synch message * @payload_size: size of the packet's payload, can be: * NULL - the payload is not interested and should be ignored * pointer to int, with value 0 - update with the size of the payload * allocate memory and cpy the payload * into it * pointer to int, with value greater than 0 - expected size of the * payload, preallocated * memory is passed to the API * with that size *@payload: pointer to the packet payload, can be: * NULL - the payload is not interested and should be ignored * pointer to char *, with value NULL - a new memory is allocated and returned * here, containing the packet's payload * the @payload_size is updated with the * size of the allocated memory. It must be * freed by free() * pointer to char *, with no-NULL value - A prealocated array is passed, with size * @payload_size. If payload's size is equal * or less, it will be copied here. * * Returns 0 if packet is received successfully, or negative error otherwise. */ int tracecmd_msg_recv_time_sync(struct tracecmd_msg_handle *msg_handle, char *sync_protocol, unsigned int *sync_msg_id, unsigned int *payload_size, char **payload) { struct tracecmd_msg msg; int ret = -1; int buf_size; memset(&msg, 0, sizeof(msg)); ret = tracecmd_msg_recv(msg_handle->fd, &msg); if (ret < 0) goto out; if (ntohl(msg.hdr.cmd) != MSG_TIME_SYNC) { ret = -EOPNOTSUPP; goto out; } if (sync_protocol) strncpy(sync_protocol, msg.tsync.sync_protocol_name, TRACECMD_TSYNC_PNAME_LENGTH); if (sync_msg_id) *sync_msg_id = ntohl(msg.tsync.sync_msg_id); buf_size = msg_buf_len(&msg); if (buf_size < 0) { ret = -EINVAL; goto out; } if (buf_size && payload && payload_size) { if (*payload_size) { if (*payload_size < buf_size || *payload == NULL) { ret = -ENOMEM; goto out; } memcpy(*payload, msg.buf, buf_size); goto out; } *payload = malloc(buf_size); if (*payload == NULL) { ret = -ENOMEM; goto out; } *payload_size = buf_size; memcpy(*payload, msg.buf, buf_size); } out: msg_free(&msg); return ret; } static int make_trace_resp(struct tracecmd_msg *msg, int page_size, int nr_cpus, unsigned int *ports, bool use_fifos, unsigned long long trace_id, const char *tsync_proto, unsigned int tsync_port) { int data_size; if (!tsync_proto) tsync_proto = ""; data_size = write_uints(NULL, 0, ports, nr_cpus); msg->buf = malloc(data_size); if (!msg->buf) return -ENOMEM; write_uints(msg->buf, data_size, ports, nr_cpus); msg->hdr.size = htonl(ntohl(msg->hdr.size) + data_size); msg->trace_resp.flags = use_fifos ? MSG_TRACE_USE_FIFOS : 0; msg->trace_resp.flags = htonl(msg->trace_resp.flags); strncpy(msg->trace_resp.tsync_proto_name, tsync_proto, TRACECMD_TSYNC_PNAME_LENGTH); msg->trace_resp.tsync_port = htonl(tsync_port); msg->trace_resp.cpus = htonl(nr_cpus); msg->trace_resp.page_size = htonl(page_size); msg->trace_resp.trace_id = htonll(trace_id); return 0; } int tracecmd_msg_send_trace_resp(struct tracecmd_msg_handle *msg_handle, int nr_cpus, int page_size, unsigned int *ports, bool use_fifos, unsigned long long trace_id, const char *tsync_proto, unsigned int tsync_port) { struct tracecmd_msg msg; int ret; tracecmd_msg_init(MSG_TRACE_RESP, &msg); ret = make_trace_resp(&msg, page_size, nr_cpus, ports, use_fifos, trace_id, tsync_proto, tsync_port); if (ret < 0) return ret; return tracecmd_msg_send(msg_handle, &msg); } int tracecmd_msg_recv_trace_resp(struct tracecmd_msg_handle *msg_handle, int *nr_cpus, int *page_size, unsigned int **ports, bool *use_fifos, unsigned long long *trace_id, char **tsync_proto, unsigned int *tsync_port) { struct tracecmd_msg msg; char *p, *buf_end; ssize_t buf_len; int i, ret; ret = tracecmd_msg_recv(msg_handle->fd, &msg); if (ret < 0) return ret; if (ntohl(msg.hdr.cmd) != MSG_TRACE_RESP) { ret = -ENOTSUP; goto out; } buf_len = msg_buf_len(&msg); if (buf_len <= 0) { ret = -EINVAL; goto out; } *use_fifos = ntohl(msg.trace_resp.flags) & MSG_TRACE_USE_FIFOS; *nr_cpus = ntohl(msg.trace_resp.cpus); *page_size = ntohl(msg.trace_resp.page_size); *trace_id = ntohll(msg.trace_resp.trace_id); *tsync_proto = strdup(msg.trace_resp.tsync_proto_name); *tsync_port = ntohl(msg.trace_resp.tsync_port); *ports = calloc(*nr_cpus, sizeof(**ports)); if (!*ports) { ret = -ENOMEM; goto out; } buf_end = msg.buf + buf_len; for (i = 0, p = msg.buf; i < *nr_cpus; i++, p++) { if (p >= buf_end || tatou(p, &(*ports)[i])) { free(*ports); ret = -EINVAL; goto out; } p = strchr(p, '\0'); } msg_free(&msg); return 0; out: error_operation(&msg); if (ret == -EOPNOTSUPP) handle_unexpected_msg(msg_handle, &msg); msg_free(&msg); return ret; } trace-cmd-v3.3.1/lib/trace-cmd/trace-output.c000066400000000000000000002174351470231550600207670ustar00rootroot00000000000000// SPDX-License-Identifier: LGPL-2.1 /* * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt * */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "tracefs.h" #include "trace-cmd.h" #include "trace-cmd-local.h" #include "trace-write-local.h" #include "list.h" #include "trace-msg.h" /* We can't depend on the host size for size_t, all must be 64 bit */ typedef unsigned long long tsize_t; typedef long long stsize_t; struct tracecmd_option { unsigned short id; int size; void *data; tsize_t offset; struct list_head list; }; struct tracecmd_buffer { int cpus; void *name; tsize_t offset; struct tracecmd_option *option; struct list_head list; }; enum { OUTPUT_FL_SEND_META = (1 << 0), }; struct tracecmd_output { int fd; int page_size; int cpus; struct tep_handle *pevent; char *tracing_dir; char *kallsyms; int nr_options; bool quiet; unsigned long file_state; unsigned long file_version; /* size of meta-data strings, not yet stored in the file */ unsigned long strings_p; /* current virtual offset of meta-data string */ unsigned long strings_offs; unsigned long long options_start; unsigned long long options_next; bool big_endian; bool do_compress; struct tracecmd_compression *compress; struct list_head options; struct list_head buffers; struct tracecmd_msg_handle *msg_handle; char *trace_clock; /* meta-data strings, not yet stored in the file */ char *strings; }; struct list_event { struct list_event *next; char *name; char *file; }; struct list_event_system { struct list_event_system *next; struct list_event *events; char *name; }; #define HAS_SECTIONS(H) ((H)->file_version >= FILE_VERSION_SECTIONS) static int write_options(struct tracecmd_output *handle); static int save_string_section(struct tracecmd_output *handle, bool compress); __hidden long long do_write_check(struct tracecmd_output *handle, const void *data, long long size) { if (handle->do_compress) return tracecmd_compress_buffer_write(handle->compress, data, size); if (handle->msg_handle) return tracecmd_msg_data_send(handle->msg_handle, data, size); return __do_write_check(handle->fd, data, size); } static inline off_t do_lseek(struct tracecmd_output *handle, off_t offset, int whence) { if (handle->do_compress) return tracecmd_compress_lseek(handle->compress, offset, whence); if (handle->msg_handle) return msg_lseek(handle->msg_handle, offset, whence); return lseek(handle->fd, offset, whence); } static inline int do_preed(struct tracecmd_output *handle, void *dst, int len, off_t offset) { if (handle->do_compress) return tracecmd_compress_pread(handle->compress, dst, len, offset); return pread(handle->fd, dst, len, offset); } static short convert_endian_2(struct tracecmd_output *handle, short val) { if (!handle->pevent) return val; return tep_read_number(handle->pevent, &val, 2); } static int convert_endian_4(struct tracecmd_output *handle, int val) { if (!handle->pevent) return val; return tep_read_number(handle->pevent, &val, 4); } static unsigned long long convert_endian_8(struct tracecmd_output *handle, unsigned long long val) { if (!handle->pevent) return val; return tep_read_number(handle->pevent, &val, 8); } __hidden void out_compression_reset(struct tracecmd_output *handle, bool compress) { if (!compress || !handle->compress) return; tracecmd_compress_reset(handle->compress); handle->do_compress = false; } __hidden int out_uncompress_block(struct tracecmd_output *handle) { int ret = 0; if (!handle->compress) return 0; ret = tracecmd_uncompress_block(handle->compress); if (!ret) handle->do_compress = true; return ret; } __hidden int out_compression_start(struct tracecmd_output *handle, bool compress) { if (!compress || !handle->compress) return 0; tracecmd_compress_reset(handle->compress); handle->do_compress = true; return 0; } __hidden int out_compression_end(struct tracecmd_output *handle, bool compress) { if (!compress || !handle->compress) return 0; handle->do_compress = false; return tracecmd_compress_block(handle->compress); } static long add_string(struct tracecmd_output *handle, const char *string) { int size = strlen(string) + 1; int pos = handle->strings_p; char *strings; strings = realloc(handle->strings, pos + size); if (!strings) return -1; handle->strings = strings; memcpy(handle->strings + pos, string, size); handle->strings_p += size; return handle->strings_offs + pos; } /** * tracecmd_set_quiet - Set if to print output to the screen * @quiet: If non zero, print no output to the screen * */ void tracecmd_set_quiet(struct tracecmd_output *handle, bool set_quiet) { if (handle) handle->quiet = set_quiet; } void tracecmd_set_out_clock(struct tracecmd_output *handle, const char *clock) { if (handle && clock) { free(handle->trace_clock); handle->trace_clock = strdup(clock); } } /** * tracecmd_get_quiet - Get if to print output to the screen * Returns non zero, if no output to the screen should be printed * */ bool tracecmd_get_quiet(struct tracecmd_output *handle) { if (handle) return handle->quiet; return false; } void tracecmd_output_free(struct tracecmd_output *handle) { struct tracecmd_option *option; struct tracecmd_buffer *buffer; if (!handle) return; if (handle->tracing_dir) free(handle->tracing_dir); if (handle->pevent) tep_unref(handle->pevent); while (!list_empty(&handle->buffers)) { buffer = container_of(handle->buffers.next, struct tracecmd_buffer, list); list_del(&buffer->list); free(buffer->name); free(buffer); } while (!list_empty(&handle->options)) { option = container_of(handle->options.next, struct tracecmd_option, list); list_del(&option->list); free(option->data); free(option); } free(handle->strings); free(handle->trace_clock); tracecmd_compress_destroy(handle->compress); free(handle); } void tracecmd_output_flush(struct tracecmd_output *handle) { if (!handle) return; if (HAS_SECTIONS(handle)) { /* write any unsaved options at the end of trace files with sections */ write_options(handle); /* write strings section */ save_string_section(handle, true); } } void tracecmd_output_close(struct tracecmd_output *handle) { if (!handle) return; tracecmd_output_flush(handle); if (handle->fd >= 0) { close(handle->fd); handle->fd = -1; } tracecmd_output_free(handle); } static unsigned long get_size_fd(int fd) { unsigned long long size = 0; char buf[BUFSIZ]; int r; do { r = read(fd, buf, BUFSIZ); if (r > 0) size += r; } while (r > 0); lseek(fd, 0, SEEK_SET); return size; } static unsigned long get_size(const char *file) { unsigned long long size = 0; int fd; fd = open(file, O_RDONLY); if (fd < 0) { tracecmd_warning("Can't read '%s'", file); return 0; /* Caller will fail with zero */ } size = get_size_fd(fd); close(fd); return size; } static tsize_t copy_file_fd(struct tracecmd_output *handle, int fd, unsigned long long max) { tsize_t rsize = BUFSIZ; tsize_t size = 0; char buf[BUFSIZ]; stsize_t r; do { if (max && rsize > max) rsize = max; r = read(fd, buf, rsize); if (r > 0) { size += r; if (do_write_check(handle, buf, r)) return 0; if (max) { max -= r; if (!max) break; } } } while (r > 0); return size; } static tsize_t copy_file(struct tracecmd_output *handle, const char *file) { tsize_t size = 0; int fd; fd = open(file, O_RDONLY); if (fd < 0) { tracecmd_warning("Can't read '%s'", file); return 0; } size = copy_file_fd(handle, fd, 0); close(fd); return size; } #define PAGES_IN_CHUNK 10 __hidden unsigned long long out_copy_fd_compress(struct tracecmd_output *handle, int fd, unsigned long long max, unsigned long long *write_size, int page) { size_t rsize = 0; size_t wsize = 0; size_t size; int ret; if (handle->compress) { rsize = max; ret = tracecmd_compress_copy_from(handle->compress, fd, PAGES_IN_CHUNK * page, &rsize, &wsize); if (ret < 0) return 0; size = rsize; if (write_size) *write_size = wsize; } else { size = copy_file_fd(handle, fd, max); if (write_size) *write_size = size; } return size; } static tsize_t copy_file_compress(struct tracecmd_output *handle, const char *file, unsigned long long *write_size) { int ret; int fd; fd = open(file, O_RDONLY); if (fd < 0) { tracecmd_warning("Can't read '%s'", file); return 0; } ret = out_copy_fd_compress(handle, fd, 0, write_size, getpagesize()); if (!ret) tracecmd_warning("Can't compress '%s'", file); close(fd); return ret; } /* * Finds the path to the debugfs/tracing * Allocates the string and stores it. */ static const char *find_tracing_dir(struct tracecmd_output *handle) { if (!handle->tracing_dir) { const char *dir = tracefs_tracing_dir(); if (dir) handle->tracing_dir = strdup(dir); } return handle->tracing_dir; } static char *get_tracing_file(struct tracecmd_output *handle, const char *name) { const char *tracing; char *file; int ret; tracing = find_tracing_dir(handle); if (!tracing) return NULL; ret = asprintf(&file, "%s/%s", tracing, name); if (ret < 0) return NULL; return file; } static void put_tracing_file(char *file) { free(file); } int tracecmd_ftrace_enable(int set) { struct stat buf; char *path = "/proc/sys/kernel/ftrace_enabled"; int fd; char *val = set ? "1" : "0"; int ret = 0; /* if ftace_enable does not exist, simply ignore it */ fd = stat(path, &buf); if (fd < 0) return ENODEV; fd = open(path, O_WRONLY); if (fd < 0) { tracecmd_warning("Can't %s ftrace", set ? "enable" : "disable"); return EIO; } if (write(fd, val, 1) < 0) ret = -1; close(fd); return ret; } __hidden unsigned long long out_write_section_header(struct tracecmd_output *handle, unsigned short header_id, char *description, int flags, bool option) { tsize_t endian8; tsize_t offset; long long size; short endian2; int endian4; int desc; if (header_id >= TRACECMD_OPTION_MAX) return -1; if (!HAS_SECTIONS(handle)) return 0; if (!handle->compress) flags &= ~TRACECMD_SEC_FL_COMPRESS; offset = do_lseek(handle, 0, SEEK_CUR); if (option) { endian8 = convert_endian_8(handle, offset); if (!tracecmd_add_option(handle, header_id, 8, &endian8)) return -1; } /* Section ID */ endian2 = convert_endian_2(handle, header_id); if (do_write_check(handle, &endian2, 2)) return (off_t)-1; /* Section flags */ endian2 = convert_endian_2(handle, flags); if (do_write_check(handle, &endian2, 2)) return (off_t)-1; /* Section description */ if (description) desc = add_string(handle, description); else desc = -1; endian4 = convert_endian_4(handle, desc); if (do_write_check(handle, &endian4, 4)) return (off_t)-1; offset = do_lseek(handle, 0, SEEK_CUR); size = 0; /* Reserve for section size */ if (do_write_check(handle, &size, 8)) return (off_t)-1; return offset; } __hidden int out_update_section_header(struct tracecmd_output *handle, tsize_t offset) { tsize_t current; tsize_t endian8; tsize_t size; if (!HAS_SECTIONS(handle) || offset == 0) return 0; current = do_lseek(handle, 0, SEEK_CUR); /* The real size is the difference between the saved offset and * the current offset - 8 bytes, the reserved space for the section size. */ size = current - offset; if (size < 8) return -1; size -= 8; if (do_lseek(handle, offset, SEEK_SET) == (off_t)-1) return -1; endian8 = convert_endian_8(handle, size); if (do_write_check(handle, &endian8, 8)) return -1; if (do_lseek(handle, current, SEEK_SET) == (off_t)-1) return -1; return 0; } static int save_string_section(struct tracecmd_output *handle, bool compress) { enum tracecmd_section_flags flags = 0; tsize_t offset; if (!handle->strings || !handle->strings_p) return 0; if (!check_out_state(handle, TRACECMD_OPTION_STRINGS)) { tracecmd_warning("Cannot write strings, unexpected state 0x%X", handle->file_state); return -1; } if (compress) flags |= TRACECMD_SEC_FL_COMPRESS; offset = out_write_section_header(handle, TRACECMD_OPTION_STRINGS, "strings", flags, false); if (offset == (off_t)-1) return -1; out_compression_start(handle, compress); if (do_write_check(handle, handle->strings, handle->strings_p)) goto error; if (out_compression_end(handle, compress)) goto error; if (out_update_section_header(handle, offset)) return -1; handle->strings_offs += handle->strings_p; free(handle->strings); handle->strings = NULL; handle->strings_p = 0; handle->file_state = TRACECMD_OPTION_STRINGS; return 0; error: out_compression_reset(handle, compress); return -1; } static int read_header_files(struct tracecmd_output *handle, bool compress) { enum tracecmd_section_flags flags = 0; tsize_t size, check_size, endian8; struct stat st; tsize_t offset; char *path; int fd = -1; int ret; if (!check_out_state(handle, TRACECMD_FILE_HEADERS)) { tracecmd_warning("Cannot read header files, unexpected state 0x%X", handle->file_state); return -1; } path = get_tracing_file(handle, "events/header_page"); if (!path) return -1; if (compress) flags |= TRACECMD_SEC_FL_COMPRESS; offset = out_write_section_header(handle, TRACECMD_OPTION_HEADER_INFO, "headers", flags, true); if (offset == (off_t)-1) { put_tracing_file(path); return -1; } out_compression_start(handle, compress); ret = stat(path, &st); if (ret < 0) { /* old style did not show this info, just add zero */ put_tracing_file(path); if (do_write_check(handle, "header_page", 12)) goto out_close; size = 0; if (do_write_check(handle, &size, 8)) goto out_close; if (do_write_check(handle, "header_event", 13)) goto out_close; if (do_write_check(handle, &size, 8)) goto out_close; if (out_compression_end(handle, compress)) goto out_close; if (out_update_section_header(handle, offset)) goto out_close; return 0; } fd = open(path, O_RDONLY); if (fd < 0) { tracecmd_warning("can't read '%s'", path); goto out_free; } /* unfortunately, you can not stat debugfs files for size */ size = get_size_fd(fd); if (do_write_check(handle, "header_page", 12)) goto out_free; endian8 = convert_endian_8(handle, size); if (do_write_check(handle, &endian8, 8)) goto out_free; check_size = copy_file_fd(handle, fd, 0); if (size != check_size) { tracecmd_warning("wrong size for '%s' size=%lld read=%lld", path, size, check_size); errno = EINVAL; goto out_free; } put_tracing_file(path); path = get_tracing_file(handle, "events/header_event"); if (!path) goto out_close; close(fd); fd = open(path, O_RDONLY); if (fd < 0) { tracecmd_warning("can't read '%s'", path); goto out_free; } size = get_size_fd(fd); if (do_write_check(handle, "header_event", 13)) goto out_free; endian8 = convert_endian_8(handle, size); if (do_write_check(handle, &endian8, 8)) goto out_free; check_size = copy_file_fd(handle, fd, 0); close(fd); if (size != check_size) { tracecmd_warning("wrong size for '%s'", path); goto out_free; } put_tracing_file(path); if (out_compression_end(handle, compress)) goto out_close; if (out_update_section_header(handle, offset)) goto out_close; handle->file_state = TRACECMD_FILE_HEADERS; return 0; out_free: put_tracing_file(path); out_close: out_compression_reset(handle, compress); if (fd >= 0) close(fd); return -1; } static int copy_event_system(struct tracecmd_output *handle, struct list_event_system *slist) { struct list_event *elist; unsigned long long size, check_size, endian8; struct stat st; char *format; int endian4; int count = 0; int ret; for (elist = slist->events; elist; elist = elist->next) count++; endian4 = convert_endian_4(handle, count); if (do_write_check(handle, &endian4, 4)) return -1; for (elist = slist->events; elist; elist = elist->next) { format = elist->file; ret = stat(format, &st); if (ret >= 0) { /* unfortunately, you can not stat debugfs files for size */ size = get_size(format); endian8 = convert_endian_8(handle, size); if (do_write_check(handle, &endian8, 8)) return -1; check_size = copy_file(handle, format); if (size != check_size) { tracecmd_warning("error in size of file '%s'", format); return -1; } } } return 0; } static void add_list_event_system(struct list_event_system **systems, const char *system, const char *event, const char *path) { struct list_event_system *slist; struct list_event *elist; for (slist = *systems; slist; slist = slist->next) if (strcmp(slist->name, system) == 0) break; if (!slist) { slist = malloc(sizeof(*slist)); if (!slist) goto err_mem; slist->name = strdup(system); if (!slist->name) { free(slist); goto err_mem; } slist->next = *systems; slist->events = NULL; *systems = slist; } for (elist = slist->events; elist; elist = elist->next) if (strcmp(elist->name, event) == 0) break; if (!elist) { elist = malloc(sizeof(*elist)); if (!elist) goto err_mem; elist->name = strdup(event); elist->file = strdup(path); if (!elist->name || !elist->file) { free(elist->name); free(elist->file); free(elist); goto err_mem; } elist->next = slist->events; slist->events = elist; } return; err_mem: tracecmd_warning("Insufficient memory"); } static void free_list_events(struct list_event_system *list) { struct list_event_system *slist; struct list_event *elist; while (list) { slist = list; list = list->next; while (slist->events) { elist = slist->events; slist->events = elist->next; free(elist->name); free(elist->file); free(elist); } free(slist->name); free(slist); } } static void glob_events(struct tracecmd_output *handle, struct list_event_system **systems, const char *str) { glob_t globbuf; char *events_path; char *system; char *event; char *path; char *file; char *ptr; int do_ftrace = 0; int events_len; int ret; int i; if (strncmp(str, "ftrace/", 7) == 0) do_ftrace = 1; events_path = get_tracing_file(handle, "events"); events_len = strlen(events_path); path = malloc(events_len + strlen(str) + strlen("/format") + 2); if (!path) { put_tracing_file(events_path); return; } path[0] = '\0'; strcat(path, events_path); strcat(path, "/"); strcat(path, str); strcat(path, "/format"); put_tracing_file(events_path); globbuf.gl_offs = 0; ret = glob(path, 0, NULL, &globbuf); free(path); if (ret < 0) return; for (i = 0; i < globbuf.gl_pathc; i++) { file = globbuf.gl_pathv[i]; system = strdup(file + events_len + 1); system = strtok_r(system, "/", &ptr); if (!ptr) { /* ?? should we warn? */ free(system); continue; } if (!do_ftrace && strcmp(system, "ftrace") == 0) { free(system); continue; } event = strtok_r(NULL, "/", &ptr); if (!ptr) { /* ?? should we warn? */ free(system); continue; } add_list_event_system(systems, system, event, file); free(system); } globfree(&globbuf); } static void create_event_list_item(struct tracecmd_output *handle, struct list_event_system **systems, struct tracecmd_event_list *list) { char *ptr; char *str; str = strdup(list->glob); if (!str) goto err_mem; /* system and event names are separated by a ':' */ ptr = strchr(str, ':'); if (ptr) *ptr = '/'; else /* system and event may also be separated by a '/' */ ptr = strchr(str, '/'); if (ptr) { glob_events(handle, systems, str); free(str); return; } ptr = str; str = malloc(strlen(ptr) + 3); if (!str) goto err_mem; str[0] = '\0'; strcat(str, ptr); strcat(str, "/*"); glob_events(handle, systems, str); str[0] = '\0'; strcat(str, "*/"); strcat(str, ptr); glob_events(handle, systems, str); free(ptr); free(str); return; err_mem: tracecmd_warning("Insufficient memory"); } static int read_ftrace_files(struct tracecmd_output *handle, bool compress) { enum tracecmd_section_flags flags = 0; struct list_event_system *systems = NULL; struct tracecmd_event_list list = { .glob = "ftrace/*" }; tsize_t offset; int ret; if (!check_out_state(handle, TRACECMD_FILE_FTRACE_EVENTS)) { tracecmd_warning("Cannot read ftrace files, unexpected state 0x%X", handle->file_state); return -1; } if (compress) flags |= TRACECMD_SEC_FL_COMPRESS; offset = out_write_section_header(handle, TRACECMD_OPTION_FTRACE_EVENTS, "ftrace events", flags, true); if (offset == (off_t)-1) return -1; create_event_list_item(handle, &systems, &list); out_compression_start(handle, compress); ret = copy_event_system(handle, systems); if (!ret) ret = out_compression_end(handle, compress); else out_compression_reset(handle, compress); free_list_events(systems); if (ret) return ret; if (out_update_section_header(handle, offset)) return -1; handle->file_state = TRACECMD_FILE_FTRACE_EVENTS; return ret; } static struct list_event_system * create_event_list(struct tracecmd_output *handle, struct tracecmd_event_list *event_list) { struct list_event_system *systems = NULL; struct tracecmd_event_list *list; for (list = event_list; list; list = list->next) create_event_list_item(handle, &systems, list); return systems; } static int read_event_files(struct tracecmd_output *handle, struct tracecmd_event_list *event_list, bool compress) { enum tracecmd_section_flags flags = 0; struct list_event_system *systems; struct list_event_system *slist; struct tracecmd_event_list *list; struct tracecmd_event_list all_events = { .glob = "*/*" }; int count = 0; tsize_t offset; int endian4; int ret; if (!check_out_state(handle, TRACECMD_FILE_ALL_EVENTS)) { tracecmd_warning("Cannot read event files, unexpected state 0x%X", handle->file_state); return -1; } if (compress) flags |= TRACECMD_SEC_FL_COMPRESS; offset = out_write_section_header(handle, TRACECMD_OPTION_EVENT_FORMATS, "events format", flags, true); if (offset == (off_t)-1) return -1; /* * If any of the list is the special keyword "all" then * just do all files. */ for (list = event_list; list; list = list->next) { if (strcmp(list->glob, "all") == 0) break; } /* all events are listed, use a global glob */ if (!event_list || list) event_list = &all_events; systems = create_event_list(handle, event_list); for (slist = systems; slist; slist = slist->next) count++; out_compression_start(handle, compress); ret = -1; endian4 = convert_endian_4(handle, count); if (do_write_check(handle, &endian4, 4)) goto out_free; ret = 0; for (slist = systems; !ret && slist; slist = slist->next) { if (do_write_check(handle, slist->name, strlen(slist->name) + 1)) { ret = -1; continue; } ret = copy_event_system(handle, slist); } if (ret) goto out_free; ret = out_compression_end(handle, compress); if (ret) goto out_free; ret = out_update_section_header(handle, offset); out_free: if (!ret) handle->file_state = TRACECMD_FILE_ALL_EVENTS; else out_compression_reset(handle, compress); free_list_events(systems); return ret; } #define KPTR_UNINITIALIZED 'X' static void set_proc_kptr_restrict(int reset) { char *path = "/proc/sys/kernel/kptr_restrict"; static char saved = KPTR_UNINITIALIZED; int fd, ret = -1; struct stat st; char buf; if ((reset && saved == KPTR_UNINITIALIZED) || (stat(path, &st) < 0)) return; fd = open(path, O_RDONLY); if (fd < 0) goto err; if (reset) { buf = saved; } else { if (read(fd, &buf, 1) < 0) goto err; saved = buf; buf = '0'; } close(fd); fd = open(path, O_WRONLY); if (fd < 0) goto err; if (write(fd, &buf, 1) > 0) ret = 0; err: if (fd >= 0) close(fd); if (ret) tracecmd_warning("can't set kptr_restrict"); } static int read_proc_kallsyms(struct tracecmd_output *handle, bool compress) { enum tracecmd_section_flags flags = 0; unsigned int size, check_size, endian4; const char *path = "/proc/kallsyms"; tsize_t offset; struct stat st; int ret; if (!check_out_state(handle, TRACECMD_FILE_KALLSYMS)) { tracecmd_warning("Cannot read kallsyms, unexpected state 0x%X", handle->file_state); return -1; } if (handle->kallsyms) path = handle->kallsyms; if (compress) flags |= TRACECMD_SEC_FL_COMPRESS; offset = out_write_section_header(handle, TRACECMD_OPTION_KALLSYMS, "kallsyms", flags, true); if (offset == (off_t)-1) return -1; out_compression_start(handle, compress); ret = stat(path, &st); if (ret < 0) { /* not found */ size = 0; endian4 = convert_endian_4(handle, size); ret = do_write_check(handle, &endian4, 4); goto out; } size = get_size(path); endian4 = convert_endian_4(handle, size); ret = do_write_check(handle, &endian4, 4); if (ret) goto out; set_proc_kptr_restrict(0); check_size = copy_file(handle, path); if (size != check_size) { errno = EINVAL; tracecmd_warning("error in size of file '%s'", path); set_proc_kptr_restrict(1); ret = -1; goto out; } set_proc_kptr_restrict(1); ret = out_compression_end(handle, compress); if (ret) goto out; ret = out_update_section_header(handle, offset); out: if (!ret) handle->file_state = TRACECMD_FILE_KALLSYMS; else out_compression_reset(handle, compress); return ret; } static int read_ftrace_printk(struct tracecmd_output *handle, bool compress) { enum tracecmd_section_flags flags = 0; unsigned int size, check_size, endian4; tsize_t offset; struct stat st; char *path; int ret; if (!check_out_state(handle, TRACECMD_FILE_PRINTK)) { tracecmd_warning("Cannot read printk, unexpected state 0x%X", handle->file_state); return -1; } path = get_tracing_file(handle, "printk_formats"); if (!path) return -1; if (compress) flags |= TRACECMD_SEC_FL_COMPRESS; offset = out_write_section_header(handle, TRACECMD_OPTION_PRINTK, "printk", flags, true); if (offset == (off_t)-1) { put_tracing_file(path); return -1; } out_compression_start(handle, compress); ret = stat(path, &st); if (ret < 0) { /* not found */ size = 0; endian4 = convert_endian_4(handle, size); if (do_write_check(handle, &endian4, 4)) goto fail; goto out; } size = get_size(path); endian4 = convert_endian_4(handle, size); if (do_write_check(handle, &endian4, 4)) goto fail; check_size = copy_file(handle, path); if (size != check_size) { errno = EINVAL; tracecmd_warning("error in size of file '%s'", path); goto fail; } out: put_tracing_file(path); if (out_compression_end(handle, compress)) return -1; if (out_update_section_header(handle, offset)) return -1; handle->file_state = TRACECMD_FILE_PRINTK; return 0; fail: put_tracing_file(path); out_compression_reset(handle, compress); return -1; } static int save_tracing_file_data(struct tracecmd_output *handle, const char *filename) { unsigned long long endian8; char *file = NULL; struct stat st; off_t check_size; off_t size; int ret = -1; file = get_tracing_file(handle, filename); if (!file) return -1; ret = stat(file, &st); if (ret >= 0) { size = get_size(file); endian8 = convert_endian_8(handle, size); if (do_write_check(handle, &endian8, 8)) goto out_free; check_size = copy_file(handle, file); if (size != check_size) { errno = EINVAL; tracecmd_warning("error in size of file '%s'", file); goto out_free; } } else { size = 0; endian8 = convert_endian_8(handle, size); if (do_write_check(handle, &endian8, 8)) goto out_free; } ret = 0; out_free: put_tracing_file(file); return ret; } static int write_compression_header(struct tracecmd_output *handle) { const char *name = NULL; const char *ver = NULL; int ret; ret = tracecmd_compress_proto_get_name(handle->compress, &name, &ver); if (ret < 0 || !name || !ver) { name = "none"; ver = ""; } if (do_write_check(handle, name, strlen(name) + 1)) return -1; if (do_write_check(handle, ver, strlen(ver) + 1)) return -1; return 0; } static int get_trace_page_size(struct tracecmd_output *handle, const char *name) { struct tracefs_instance *instance; struct tep_handle *tep = NULL; int psize, size; char *buff = NULL; /* In case of an error, return user space page size */ psize = getpagesize(); instance = tracefs_instance_alloc(find_tracing_dir(handle), name); if (!instance) goto out; buff = tracefs_instance_file_read(instance, "events/header_page", &size); if (!buff) goto out; tep = tep_alloc(); if (!tep) goto out; if (tep_parse_header_page(tep, buff, size, sizeof(long long))) goto out; psize = tep_get_sub_buffer_size(tep); out: tracefs_instance_free(instance); tep_free(tep); free(buff); return psize; } /** * tracecmd_output_create_fd - allocate new output handle to a trace file * @fd: File descriptor for the handle to write to. * * Allocate a tracecmd_output descriptor and perform minimal initialization. * @fd will be set as the file descriptor for the handle. Nothing is * written in the file yet, and if @fd is -1, then all writes will be ignored. * * Returns a pointer to a newly allocated file descriptor for the use of creating * a tracecmd data file. In case of an error, NULL is returned. The returned * handle must be freed with tracecmd_output_close() or tracecmd_output_free() */ struct tracecmd_output *tracecmd_output_create_fd(int fd) { struct tracecmd_output *handle; handle = calloc(1, sizeof(*handle)); if (!handle) return NULL; handle->fd = fd; handle->file_version = FILE_VERSION_DEFAULT; handle->page_size = get_trace_page_size(handle, NULL); handle->big_endian = tracecmd_host_bigendian(); list_head_init(&handle->options); list_head_init(&handle->buffers); handle->file_state = TRACECMD_FILE_ALLOCATED; return handle; } /** * tracecmd_output_set_msg - associated an output file handle with network message handle * @handle: output handle to a trace file. * @msg_handle: network handle, allocated by tracecmd_msg_handle_alloc() * * Associate an output file handle (@handle) to a network stream (@msg_handle). * All subsequent calls to @handle will send data over the network using @msg_handle * instead of writing to a file. * * This must be called after the handle file version is set and before calling * tracecmd_output_write_headers(). * * Returns 0 on success, or -1 if the output file handle is not allocated or not * in the expected state. */ int tracecmd_output_set_msg(struct tracecmd_output *handle, struct tracecmd_msg_handle *msg_handle) { if (!handle || handle->file_state != TRACECMD_FILE_ALLOCATED) return -1; handle->msg_handle = msg_handle; /* Force messages to be cached in a temp file before sending through the socket */ if (handle->msg_handle && HAS_SECTIONS(handle)) tracecmd_msg_handle_cache(handle->msg_handle); return 0; } /** * tracecmd_output_set_trace_dir - Set a custom tracing dir, instead of system default * @handle: output handle to a trace file. * @tracing_dir: full path to a directory with tracing files * * Associate the output file handle (@handle) with a custom tracing directory * (@tracing_dir), to be used when creating the trace file instead of using the * system default tracig directory. * * Must be called before tracecmd_output_write_headers(). * * Returns 0 on success, or -1 if the output file handle is not allocated or not * in the expected state. */ int tracecmd_output_set_trace_dir(struct tracecmd_output *handle, const char *tracing_dir) { if (!handle || handle->file_state != TRACECMD_FILE_ALLOCATED) return -1; free(handle->tracing_dir); if (tracing_dir) { handle->tracing_dir = strdup(tracing_dir); if (!handle->tracing_dir) return -1; } else handle->tracing_dir = NULL; return 0; } /** * tracecmd_output_set_kallsyms - Set a custom kernel symbols file * @handle: output handle to a trace file. * @tracing_dir: full path to a file with kernel symbols * * Have the output file handle (@handle) use a custom kernel symbols file instead * of the default /proc/kallsyms. * * Must be called before tracecmd_output_write_headers(). * * Returns 0 on success, or -1 if the output file handle is not allocated or * not in the expected state. */ int tracecmd_output_set_kallsyms(struct tracecmd_output *handle, const char *kallsyms) { if (!handle || handle->file_state != TRACECMD_FILE_ALLOCATED) return -1; free(handle->kallsyms); if (kallsyms) { handle->kallsyms = strdup(kallsyms); if (!handle->kallsyms) return -1; } else handle->kallsyms = NULL; return 0; } /** * tracecmd_output_set_from_input - Inherit parameters from an existing trace file * @handle: output handle to a trace file. * @ihandle: input handle to an existing trace file. * * Have the output file handle (@handle) inherit the properties of a given * input file handle (@ihandle). * * The parameters that are copied are: * - tep handle * - page size * - file endian * - file version * - file compression protocol * * Must be called before tracecmd_output_write_headers(). * * Returns 0 on success, or -1 if the output file handle is not allocated or * not in expected state. */ int tracecmd_output_set_from_input(struct tracecmd_output *handle, struct tracecmd_input *ihandle) { const char *cname = NULL; const char *cver = NULL; if (!handle || !ihandle || handle->file_state != TRACECMD_FILE_ALLOCATED) return -1; /* get endian, page size, file version and compression */ /* Use the pevent of the ihandle for later writes */ handle->pevent = tracecmd_get_tep(ihandle); tep_ref(handle->pevent); handle->page_size = tracecmd_page_size(ihandle); handle->file_version = tracecmd_get_in_file_version(ihandle); handle->big_endian = tep_is_file_bigendian(handle->pevent); if (!tracecmd_get_file_compress_proto(ihandle, &cname, &cver)) { handle->compress = tracecmd_compress_alloc(cname, cver, handle->fd, handle->pevent, handle->msg_handle); if (!handle->compress) return -1; if (handle->file_version < FILE_VERSION_COMPRESSION) handle->file_version = FILE_VERSION_COMPRESSION; } return 0; } /** * tracecmd_output_set_version - Set file version of the output handle * @handle: output handle to a trace file. * @file_version: desired file version * * This API must be called before tracecmd_output_write_headers(). * * Returns 0 on success, or -1 if the output file handle is not allocated or not in expected state. */ int tracecmd_output_set_version(struct tracecmd_output *handle, int file_version) { if (!handle || handle->file_state != TRACECMD_FILE_ALLOCATED) return -1; if (file_version < FILE_VERSION_MIN || file_version > FILE_VERSION_MAX) return -1; handle->file_version = file_version; if (handle->file_version < FILE_VERSION_COMPRESSION) handle->compress = NULL; return 0; } /** * tracecmd_output_set_compression - Set file compression algorithm of the output handle * @handle: output handle to a trace file. * @compression: name of the desired compression algorithm. Can be one of: * - "none" - do not use compression * - "all" - use the best available compression algorithm * - or specific name of the desired compression algorithm * * This API must be called before tracecmd_output_write_headers(). * * Returns 0 on success, or -1 in case of an error: * - the output file handle is not allocated or not in expected state. * - the specified compression algorithm is not available */ int tracecmd_output_set_compression(struct tracecmd_output *handle, const char *compression) { if (!handle || handle->file_state != TRACECMD_FILE_ALLOCATED) return -1; handle->compress = NULL; if (compression && strcmp(compression, "none")) { if (!strcmp(compression, "any")) { handle->compress = tracecmd_compress_alloc(NULL, NULL, handle->fd, handle->pevent, handle->msg_handle); if (!handle->compress) tracecmd_warning("No compression algorithms are supported"); } else { handle->compress = tracecmd_compress_alloc(compression, NULL, handle->fd, handle->pevent, handle->msg_handle); if (!handle->compress) { tracecmd_warning("Compression algorithm %s is not supported", compression); return -1; } } } if (handle->compress && handle->file_version < FILE_VERSION_COMPRESSION) { handle->file_version = FILE_VERSION_COMPRESSION; if (handle->msg_handle) tracecmd_msg_handle_cache(handle->msg_handle); } return 0; } /** * output_write_init - Write the initial data into the trace file * @handle: output handle to a trace file. * * Must be called after tracecmd_output_set_*() functions and before writing * anything else. * * The initial information to be written into the file: * - initial file magic bytes * - file version * - data endian * - long size * - page size * - compression header * * Returns 0 on success, or -1 if the output file handle is not allocated or * not in the expected state. */ static int output_write_init(struct tracecmd_output *handle) { unsigned long long offset; char buf[BUFSIZ]; int endian4; if (!handle || handle->file_state != TRACECMD_FILE_ALLOCATED) return -1; buf[0] = 23; buf[1] = 8; buf[2] = 68; memcpy(buf + 3, "tracing", 7); if (do_write_check(handle, buf, 10)) return -1; sprintf(buf, "%lu", handle->file_version); if (do_write_check(handle, buf, strlen(buf) + 1)) return -1; if (handle->big_endian) buf[0] = 1; else buf[0] = 0; if (do_write_check(handle, buf, 1)) return -1; /* save size of long (this may not be what the kernel is) */ buf[0] = sizeof(long); if (do_write_check(handle, buf, 1)) return -1; endian4 = convert_endian_4(handle, handle->page_size); if (do_write_check(handle, &endian4, 4)) return -1; if (handle->file_version >= FILE_VERSION_COMPRESSION) { if (write_compression_header(handle)) return -1; } if (HAS_SECTIONS(handle)) { /* Write 0 as options offset and save its location */ offset = 0; handle->options_start = do_lseek(handle, 0, SEEK_CUR); if (do_write_check(handle, &offset, 8)) return -1; } handle->file_state = TRACECMD_FILE_INIT; return 0; } /** * tracecmd_output_write_headers - Write the trace file headers * @handle: output handle to a trace file. * @list: desired events that will be included in the trace file. * It can be NULL for all available events * * These headers are written in the file: * - header files from the tracing directory * - ftrace events from the tracing directory * - event file from the tracing directory - all or only the one from @list * - kernel symbols from the tracing directory * - kernel printk strings from the tracing directory * * Returns 0 on success, or -1 in case of an error. */ int tracecmd_output_write_headers(struct tracecmd_output *handle, struct tracecmd_event_list *list) { bool compress = false; if (!handle || handle->file_state < TRACECMD_FILE_ALLOCATED) return -1; /* Write init data, if not written yet */ if (handle->file_state < TRACECMD_FILE_INIT && output_write_init(handle)) return -1; if (handle->compress) compress = true; if (read_header_files(handle, compress)) return -1; if (read_ftrace_files(handle, compress)) return -1; if (read_event_files(handle, list, compress)) return -1; if (read_proc_kallsyms(handle, compress)) return -1; if (read_ftrace_printk(handle, compress)) return -1; return 0; } /** * tracecmd_add_option_v - add options to the file * @handle: the output file handle name * @id: the id of the option * @size: the size of the option data * @data: the data to write to the file * @vector: array of vectors, pointing to the data to write in the file * @count: number of items in the vector array * * * Returns handle to update option if needed. * Just the content can be updated, with smaller or equal to * content than the specified size. */ struct tracecmd_option * tracecmd_add_option_v(struct tracecmd_output *handle, unsigned short id, const struct iovec *vector, int count) { struct tracecmd_option *option; char *data = NULL; int i, size = 0; /* * We can only add options before tracing data were written. * This may change in the future. */ if (!HAS_SECTIONS(handle) && handle->file_state > TRACECMD_FILE_OPTIONS) return NULL; for (i = 0; i < count; i++) size += vector[i].iov_len; /* Some IDs (like TRACECMD_OPTION_TRACECLOCK) pass vector with 0 / NULL data */ if (size) { data = malloc(size); if (!data) { tracecmd_warning("Insufficient memory"); return NULL; } } option = calloc(1, sizeof(*option)); if (!option) { tracecmd_warning("Could not allocate space for option"); free(data); return NULL; } handle->nr_options++; option->data = data; for (i = 0; i < count; i++) { if (vector[i].iov_base && vector[i].iov_len) { memcpy(data, vector[i].iov_base, vector[i].iov_len); data += vector[i].iov_len; } } option->size = size; option->id = id; list_add_tail(&option->list, &handle->options); return option; } /** * tracecmd_add_option - add options to the file * @handle: the output file handle name * @id: the id of the option * @size: the size of the option data * @data: the data to write to the file * * Returns handle to update option if needed * Just the content can be updated, with smaller or equal to * content than the specified size */ struct tracecmd_option * tracecmd_add_option(struct tracecmd_output *handle, unsigned short id, int size, const void *data) { struct iovec vect; vect.iov_base = (void *) data; vect.iov_len = size; return tracecmd_add_option_v(handle, id, &vect, 1); } int tracecmd_write_cpus(struct tracecmd_output *handle, int cpus) { int ret; if (!check_out_state(handle, TRACECMD_FILE_CPU_COUNT)) { tracecmd_warning("Cannot write CPU count into the file, unexpected state 0x%X", handle->file_state); return -1; } if (!HAS_SECTIONS(handle)) { cpus = convert_endian_4(handle, cpus); ret = do_write_check(handle, &cpus, 4); if (ret < 0) return ret; } else { tracecmd_add_option(handle, TRACECMD_OPTION_CPUCOUNT, sizeof(int), &cpus); } handle->file_state = TRACECMD_FILE_CPU_COUNT; return 0; } static int write_options_v6(struct tracecmd_output *handle) { struct tracecmd_option *options; unsigned short option; unsigned short endian2; unsigned int endian4; /* If already written, ignore */ if (handle->file_state == TRACECMD_FILE_OPTIONS) return 0; if (!check_out_state(handle, TRACECMD_FILE_OPTIONS)) { tracecmd_warning("Cannot write options into the file, unexpected state 0x%X", handle->file_state); return -1; } if (do_write_check(handle, "options ", 10)) return -1; handle->options_start = do_lseek(handle, 0, SEEK_CUR); list_for_each_entry(options, &handle->options, list) { endian2 = convert_endian_2(handle, options->id); if (do_write_check(handle, &endian2, 2)) return -1; endian4 = convert_endian_4(handle, options->size); if (do_write_check(handle, &endian4, 4)) return -1; /* Save the data location in case it needs to be updated */ options->offset = do_lseek(handle, 0, SEEK_CUR); if (do_write_check(handle, options->data, options->size)) return -1; } option = TRACECMD_OPTION_DONE; if (do_write_check(handle, &option, 2)) return -1; handle->file_state = TRACECMD_FILE_OPTIONS; return 0; } static int update_options_start(struct tracecmd_output *handle, off_t offset) { if (do_lseek(handle, handle->options_start, SEEK_SET) == (off_t)-1) return -1; offset = convert_endian_8(handle, offset); if (do_write_check(handle, &offset, 8)) return -1; return 0; } /** * tracecmd_pepare_options - perpare a previous options for the next * @handle: The handle to update the options for. * @offset: The offset to set the previous options to. * @whence: Where in the file to offset from. * * In a case of cached writes for network access, the options offset * cannot be written once it goes over the network. This is used * to update the next options to a known location. * * tracecmd_write_options() must be called when the offset is at the next * location, otherwise the data file will end up corrupted. * * Returns zero on success and -1 on error. */ int tracecmd_prepare_options(struct tracecmd_output *handle, off_t offset, int whence) { tsize_t curr; int ret; /* No options to start with? */ if (!handle->options_start) return 0; curr = do_lseek(handle, 0, SEEK_CUR); switch (whence) { case SEEK_SET: /* just use offset */ break; case SEEK_CUR: offset += curr; break; case SEEK_END: offset = do_lseek(handle, offset, SEEK_END); if (offset == (off_t)-1) return -1; break; } ret = update_options_start(handle, offset); if (ret < 0) return -1; handle->options_next = offset; curr = do_lseek(handle, curr, SEEK_SET); return curr == -1 ? -1 : 0; } static tsize_t write_options_start(struct tracecmd_output *handle) { tsize_t offset; int ret; offset = do_lseek(handle, 0, SEEK_CUR); if (handle->options_next) { /* options_start was already updated */ if (handle->options_next != offset) { tracecmd_warning("Options offset (%lld) does not match expected (%lld)", offset, handle->options_next); return -1; } handle->options_next = 0; /* Will be updated at the end */ handle->options_start = 0; } /* Append to the previous options section, if any */ if (handle->options_start) { ret = update_options_start(handle, offset); if (ret < 0) return -1; offset = do_lseek(handle, offset, SEEK_SET); if (offset == (off_t)-1) return -1; } return out_write_section_header(handle, TRACECMD_OPTION_DONE, "options", 0, false); } static tsize_t write_options_end(struct tracecmd_output *handle, tsize_t offset) { unsigned long long endian8; unsigned short endian2; unsigned int endian4; endian2 = convert_endian_2(handle, TRACECMD_OPTION_DONE); if (do_write_check(handle, &endian2, 2)) return -1; endian4 = convert_endian_4(handle, 8); if (do_write_check(handle, &endian4, 4)) return -1; endian8 = 0; handle->options_start = do_lseek(handle, 0, SEEK_CUR); if (do_write_check(handle, &endian8, 8)) return -1; if (out_update_section_header(handle, offset)) return -1; return 0; } static int write_options(struct tracecmd_output *handle) { struct tracecmd_option *options; unsigned short endian2; unsigned int endian4; bool new = false; tsize_t offset; /* Check if there are unsaved options */ list_for_each_entry(options, &handle->options, list) { if (!options->offset) { new = true; break; } } /* * Even if there are no new options, if options_next is set, it requires * adding a new empty options section as the previous one already * points to it. */ if (!new && !handle->options_next) return 0; offset = write_options_start(handle); if (offset == (off_t)-1) return -1; list_for_each_entry(options, &handle->options, list) { /* Option is already saved, skip it */ if (options->offset) continue; endian2 = convert_endian_2(handle, options->id); if (do_write_check(handle, &endian2, 2)) return -1; endian4 = convert_endian_4(handle, options->size); if (do_write_check(handle, &endian4, 4)) return -1; /* Save the data location */ options->offset = do_lseek(handle, 0, SEEK_CUR); if (do_write_check(handle, options->data, options->size)) return -1; } return write_options_end(handle, offset); } /** * trace_get_options - Get the current options from the output file handle * @handle: The output file descriptor that has options. * @len: Returns the length of the buffer allocated and returned. * * Reads the options that have not been written to the file yet, * puts them into an allocated buffer and sets @len to the size * added. Used by trace-msg.c to send options over the network. * * Note, the options cannot be referenced again once this is called. * New options can be added and referenced. * * Returns an allocated buffer (must be freed with free()) that contains * the options to send, with @len set to the size of the content. * NULL on error (and @len is undefined). */ __hidden void *trace_get_options(struct tracecmd_output *handle, size_t *len) { struct tracecmd_msg_handle msg_handle; struct tracecmd_output out_handle; struct tracecmd_option *options; unsigned short endian2; unsigned int endian4; tsize_t offset; void *buf = NULL; /* Use the msg_cache as our output */ memset(&msg_handle, 0, sizeof(msg_handle)); msg_handle.cfd = -1; if (tracecmd_msg_handle_cache(&msg_handle) < 0) return NULL; out_handle = *handle; out_handle.fd = msg_handle.cfd; out_handle.msg_handle = &msg_handle; list_for_each_entry(options, &handle->options, list) { /* Option is already saved, skip it */ if (options->offset) continue; endian2 = convert_endian_2(handle, options->id); if (do_write_check(&out_handle, &endian2, 2)) goto out; endian4 = convert_endian_4(handle, options->size); if (do_write_check(&out_handle, &endian4, 4)) goto out; /* The option can not be referenced again */ options->offset = -1; if (do_write_check(&out_handle, options->data, options->size)) goto out; } offset = do_lseek(&out_handle, 0, SEEK_CUR); buf = malloc(offset); if (!buf) goto out; if (do_lseek(&out_handle, 0, SEEK_SET) == (off_t)-1) goto out; *len = read(msg_handle.cfd, buf, offset); if (*len != offset) { free(buf); buf = NULL; } out: close(msg_handle.cfd); return buf; } /** * trace_append_options - Append options to the file * @handle: The output file descriptor that has options. * @buf: The options to append. * @len: The length of @buf. * * Will add an options section header for the content of @buf to * be written as options into the @handle. * Used by trace-msg.c to retrieve options over the network. * * Returns 0 on success and -1 on error. */ __hidden int trace_append_options(struct tracecmd_output *handle, void *buf, size_t len) { tsize_t offset; offset = write_options_start(handle); if (offset == (off_t)-1) return -1; if (do_write_check(handle, buf, len)) return -1; return write_options_end(handle, offset); } int tracecmd_write_meta_strings(struct tracecmd_output *handle) { if (!HAS_SECTIONS(handle)) return 0; return save_string_section(handle, true); } int tracecmd_write_options(struct tracecmd_output *handle) { if (!HAS_SECTIONS(handle)) return write_options_v6(handle); return write_options(handle); } static int append_options_v6(struct tracecmd_output *handle) { struct tracecmd_option *options; unsigned short option; unsigned short endian2; unsigned int endian4; off_t offset; int r; /* * We can append only if options are already written and tracing data * is not yet written */ if (handle->file_state != TRACECMD_FILE_OPTIONS) return -1; if (do_lseek(handle, 0, SEEK_END) == (off_t)-1) return -1; offset = do_lseek(handle, -2, SEEK_CUR); if (offset == (off_t)-1) return -1; r = do_preed(handle, &option, 2, offset); if (r != 2 || option != TRACECMD_OPTION_DONE) return -1; list_for_each_entry(options, &handle->options, list) { endian2 = convert_endian_2(handle, options->id); if (do_write_check(handle, &endian2, 2)) return -1; endian4 = convert_endian_4(handle, options->size); if (do_write_check(handle, &endian4, 4)) return -1; /* Save the data location in case it needs to be updated */ options->offset = do_lseek(handle, 0, SEEK_CUR); if (do_write_check(handle, options->data, options->size)) return -1; } option = TRACECMD_OPTION_DONE; if (do_write_check(handle, &option, 2)) return -1; return 0; } int tracecmd_append_options(struct tracecmd_output *handle) { if (!HAS_SECTIONS(handle)) return append_options_v6(handle); return write_options(handle); } static struct tracecmd_option * add_buffer_option_v6(struct tracecmd_output *handle, const char *name, int cpus) { struct tracecmd_option *option; char *buf; int size = 8 + strlen(name) + 1; buf = calloc(1, size); if (!buf) { tracecmd_warning("Failed to malloc buffer"); return NULL; } *(tsize_t *)buf = 0; strcpy(buf + 8, name); option = tracecmd_add_option(handle, TRACECMD_OPTION_BUFFER, size, buf); free(buf); /* * In case a buffer instance has different number of CPUs as the * local machine. */ if (cpus) tracecmd_add_option(handle, TRACECMD_OPTION_CPUCOUNT, sizeof(int), &cpus); return option; } int tracecmd_add_buffer_info(struct tracecmd_output *handle, const char *name, int cpus) { struct tracecmd_buffer *buf; buf = calloc(1, sizeof(struct tracecmd_buffer)); if (!buf) return -1; buf->name = strdup(name); buf->cpus = cpus; if (!buf->name) { free(buf); return -1; } list_add_tail(&buf->list, &handle->buffers); return 0; } int tracecmd_write_buffer_info(struct tracecmd_output *handle) { struct tracecmd_option *option; struct tracecmd_buffer *buf; if (HAS_SECTIONS(handle)) return 0; list_for_each_entry(buf, &handle->buffers, list) { option = add_buffer_option_v6(handle, buf->name, buf->cpus); if (!option) return -1; buf->option = option; } return 0; } static tsize_t get_buffer_file_offset(struct tracecmd_output *handle, const char *name) { struct tracecmd_buffer *buf; list_for_each_entry(buf, &handle->buffers, list) { if (!strcmp(name, buf->name)) { if (!buf->option) break; return buf->option->offset; } } return 0; } int tracecmd_write_cmdlines(struct tracecmd_output *handle) { enum tracecmd_section_flags flags = 0; bool compress = false; tsize_t offset; int ret; if (!check_out_state(handle, TRACECMD_FILE_CMD_LINES)) { tracecmd_warning("Cannot write command lines into the file, unexpected state 0x%X", handle->file_state); return -1; } if (handle->compress) compress = true; if (compress) flags |= TRACECMD_SEC_FL_COMPRESS; offset = out_write_section_header(handle, TRACECMD_OPTION_CMDLINES, "command lines", flags, true); if (offset == (off_t)-1) return -1; out_compression_start(handle, compress); ret = save_tracing_file_data(handle, "saved_cmdlines"); if (ret < 0) { out_compression_reset(handle, compress); return ret; } if (out_compression_end(handle, compress)) return -1; if (out_update_section_header(handle, offset)) return -1; handle->file_state = TRACECMD_FILE_CMD_LINES; return 0; } static char *get_clock(struct tracecmd_output *handle) { struct tracefs_instance *inst; if (handle->trace_clock) return handle->trace_clock; /* * If no clock is set on this handle, get the trace clock of * the top instance in the handle's tracing dir */ if (!handle->tracing_dir) { handle->trace_clock = tracefs_get_clock(NULL); return handle->trace_clock; } inst = tracefs_instance_alloc(handle->tracing_dir, NULL); if (!inst) return NULL; handle->trace_clock = tracefs_get_clock(inst); tracefs_instance_free(inst); return handle->trace_clock; } __hidden struct tracecmd_option * out_add_buffer_option(struct tracecmd_output *handle, const char *name, unsigned short id, unsigned long long data_offset, int cpus, struct data_file_write *cpu_data, int page_size) { struct tracecmd_option *option; int i, j = 0, k = 0; int *cpu_ids = NULL; struct iovec *vect; char *clock; if (!HAS_SECTIONS(handle)) return NULL; clock = get_clock(handle); if (!clock) { tracecmd_warning("Could not find clock, set to 'local'"); clock = "local"; } /* * Buffer flyrecord option: * - trace data offset in the file * - buffer name * - buffer clock * - page size * - CPU count * - for each CPU: * - CPU id * - CPU trace data offset in the file * - CPU trace data size */ /* * Buffer latency option: * - trace data offset in the file * - buffer name * - buffer clock */ /* * 5 : offset, name, clock, page size, count * 3 : cpu offset, name, clock */ vect = calloc(5 + (cpus * 3), sizeof(struct iovec)); if (!vect) return NULL; if (cpus) { cpu_ids = calloc(cpus, sizeof(int)); if (!cpu_ids) { free(vect); return NULL; } } vect[j].iov_base = (void *) &data_offset; vect[j++].iov_len = 8; vect[j].iov_base = (void *) name; vect[j++].iov_len = strlen(name) + 1; vect[j].iov_base = (void *) clock; vect[j++].iov_len = strlen(clock) + 1; if (id == TRACECMD_OPTION_BUFFER) { vect[j].iov_base = &page_size; vect[j++].iov_len = 4; vect[j].iov_base = (void *) &k; vect[j++].iov_len = 4; for (i = 0; i < cpus; i++) { if (!cpu_data[i].file_size) continue; cpu_ids[i] = i; vect[j].iov_base = &cpu_ids[i]; vect[j++].iov_len = 4; vect[j].iov_base = &cpu_data[i].data_offset; vect[j++].iov_len = 8; vect[j].iov_base = &cpu_data[i].write_size; vect[j++].iov_len = 8; k++; } } option = tracecmd_add_option_v(handle, id, vect, j); free(vect); free(cpu_ids); return option; } struct tracecmd_output *tracecmd_create_file_latency(const char *output_file, int cpus, int file_version, const char *compression) { enum tracecmd_section_flags flags = 0; struct tracecmd_output *handle; tsize_t offset; char *path; handle = tracecmd_output_create(output_file); if (!handle) return NULL; if (file_version && tracecmd_output_set_version(handle, file_version)) goto out_free; if (compression) { if (tracecmd_output_set_compression(handle, compression)) goto out_free; } else if (file_version >= FILE_VERSION_COMPRESSION) { tracecmd_output_set_compression(handle, "any"); } if (tracecmd_output_write_headers(handle, NULL)) goto out_free; /* * Save the command lines; */ if (tracecmd_write_cmdlines(handle) < 0) goto out_free; if (tracecmd_write_cpus(handle, cpus) < 0) goto out_free; if (tracecmd_write_buffer_info(handle) < 0) goto out_free; if (tracecmd_write_options(handle) < 0) goto out_free; if (!check_out_state(handle, TRACECMD_FILE_CPU_LATENCY)) { tracecmd_warning("Cannot write latency data into the file, unexpected state 0x%X", handle->file_state); goto out_free; } if (!HAS_SECTIONS(handle) && do_write_check(handle, "latency ", 10)) goto out_free; path = get_tracing_file(handle, "trace"); if (!path) goto out_free; offset = do_lseek(handle, 0, SEEK_CUR); if (HAS_SECTIONS(handle) && !out_add_buffer_option(handle, "", TRACECMD_OPTION_BUFFER_TEXT, offset, 0, NULL, getpagesize())) goto out_free; if (handle->compress) flags |= TRACECMD_SEC_FL_COMPRESS; offset = out_write_section_header(handle, TRACECMD_OPTION_BUFFER_TEXT, "buffer latency", flags, false); copy_file_compress(handle, path, NULL); if (out_update_section_header(handle, offset)) goto out_free; put_tracing_file(path); handle->file_state = TRACECMD_FILE_CPU_LATENCY; if (HAS_SECTIONS(handle)) tracecmd_write_options(handle); return handle; out_free: tracecmd_output_close(handle); return NULL; } static int save_clock(struct tracecmd_output *handle, char *clock) { unsigned long long endian8; char *str = NULL; int ret; ret = asprintf(&str, "[%s]", clock); if (ret < 0) return -1; endian8 = convert_endian_8(handle, strlen(str)); ret = do_write_check(handle, &endian8, 8); if (ret) goto out; ret = do_write_check(handle, str, strlen(str)); out: free(str); return ret; } static int update_buffer_cpu_offset_v6(struct tracecmd_output *handle, const char *name, tsize_t offset) { tsize_t b_offset; tsize_t current; if (!name) name = ""; b_offset = get_buffer_file_offset(handle, name); if (!b_offset) { tracecmd_warning("Cannot find description for buffer %s", name); return -1; } current = do_lseek(handle, 0, SEEK_CUR); /* Go to the option data, where will write the offest */ if (do_lseek(handle, b_offset, SEEK_SET) == (off_t)-1) { tracecmd_warning("could not seek to %lld", b_offset); return -1; } if (do_write_check(handle, &offset, 8)) return -1; /* Go back to end of file */ if (do_lseek(handle, current, SEEK_SET) == (off_t)-1) { tracecmd_warning("could not seek to %lld", offset); return -1; } return 0; } __hidden int out_write_emty_cpu_data(struct tracecmd_output *handle, int cpus) { unsigned long long zero = 0; char *clock; int ret; int i; if (HAS_SECTIONS(handle)) return 0; ret = handle->file_state == TRACECMD_FILE_CPU_FLYRECORD ? 0 : check_file_state(handle->file_version, handle->file_state, TRACECMD_FILE_CPU_FLYRECORD); if (ret < 0) { tracecmd_warning("Cannot write trace data into the file, unexpected state 0x%X", handle->file_state); return ret; } if (do_write_check(handle, "flyrecord", 10)) return -1; for (i = 0; i < cpus; i++) { /* Write 0 for trace data offset and size */ if (do_write_check(handle, &zero, 8)) return -1; if (do_write_check(handle, &zero, 8)) return -1; } clock = get_clock(handle); if (clock && save_clock(handle, clock)) return -1; handle->file_state = TRACECMD_FILE_CPU_FLYRECORD; return 0; } __hidden int out_write_cpu_data(struct tracecmd_output *handle, int cpus, struct cpu_data_source *data, const char *buff_name) { struct data_file_write *data_files = NULL; enum tracecmd_section_flags flags = 0; tsize_t data_offs, offset; unsigned long long endian8; unsigned long long read_size; int page_size; char *clock; char *str; int ret; int i; /* This can be called multiple times (when recording instances) */ ret = handle->file_state == TRACECMD_FILE_CPU_FLYRECORD ? 0 : check_file_state(handle->file_version, handle->file_state, TRACECMD_FILE_CPU_FLYRECORD); if (ret < 0) { tracecmd_warning("Cannot write trace data into the file, unexpected state 0x%X", handle->file_state); goto out_free; } if (*buff_name == '\0') page_size = handle->page_size; else page_size = get_trace_page_size(handle, buff_name); data_offs = do_lseek(handle, 0, SEEK_CUR); if (!HAS_SECTIONS(handle) && do_write_check(handle, "flyrecord", 10)) goto out_free; if (handle->compress) flags |= TRACECMD_SEC_FL_COMPRESS; if (asprintf(&str, "buffer flyrecord %s", buff_name) < 1) goto out_free; offset = out_write_section_header(handle, TRACECMD_OPTION_BUFFER, str, flags, false); free(str); if (offset == (off_t)-1) goto out_free; data_files = calloc(cpus, sizeof(*data_files)); if (!data_files) goto out_free; for (i = 0; i < cpus; i++) { data_files[i].file_size = data[i].size; /* * Place 0 for the data offset and size, and save the offsets to * updated them with the correct data later. */ if (!HAS_SECTIONS(handle)) { endian8 = 0; data_files[i].file_data_offset = do_lseek(handle, 0, SEEK_CUR); if (do_write_check(handle, &endian8, 8)) goto out_free; data_files[i].file_write_size = do_lseek(handle, 0, SEEK_CUR); if (do_write_check(handle, &endian8, 8)) goto out_free; } } if (!HAS_SECTIONS(handle)) { update_buffer_cpu_offset_v6(handle, buff_name, data_offs); clock = get_clock(handle); if (clock && save_clock(handle, clock)) goto out_free; } for (i = 0; i < cpus; i++) { data_files[i].data_offset = do_lseek(handle, 0, SEEK_CUR); /* Page align offset */ data_files[i].data_offset += page_size - 1; data_files[i].data_offset &= ~(page_size - 1); ret = do_lseek(handle, data_files[i].data_offset, SEEK_SET); if (ret == (off_t)-1) goto out_free; if (!tracecmd_get_quiet(handle)) fprintf(stderr, "CPU%d data recorded at offset=0x%llx\n", i, (unsigned long long)data_files[i].data_offset); if (data[i].size) { if (lseek(data[i].fd, data[i].offset, SEEK_SET) == (off_t)-1) goto out_free; read_size = out_copy_fd_compress(handle, data[i].fd, data[i].size, &data_files[i].write_size, page_size); if (read_size != data_files[i].file_size) { errno = EINVAL; tracecmd_warning("did not match size of %llu to %llu", read_size, data_files[i].file_size); goto out_free; } } else { data_files[i].write_size = 0; } if (!HAS_SECTIONS(handle)) { /* Write the real CPU data offset in the file */ if (do_lseek(handle, data_files[i].file_data_offset, SEEK_SET) == (off_t)-1) goto out_free; endian8 = convert_endian_8(handle, data_files[i].data_offset); if (do_write_check(handle, &endian8, 8)) goto out_free; /* Write the real CPU data size in the file */ if (do_lseek(handle, data_files[i].file_write_size, SEEK_SET) == (off_t)-1) goto out_free; endian8 = convert_endian_8(handle, data_files[i].write_size); if (do_write_check(handle, &endian8, 8)) goto out_free; offset = data_files[i].data_offset + data_files[i].write_size; if (do_lseek(handle, offset, SEEK_SET) == (off_t)-1) goto out_free; } if (!tracecmd_get_quiet(handle)) { fprintf(stderr, " %llu bytes in size", (unsigned long long)data_files[i].write_size); if (flags & TRACECMD_SEC_FL_COMPRESS) fprintf(stderr, " (%llu uncompressed)", (unsigned long long)data_files[i].file_size); fprintf(stderr, "\n"); } } if (HAS_SECTIONS(handle) && !out_add_buffer_option(handle, buff_name, TRACECMD_OPTION_BUFFER, data_offs, cpus, data_files, page_size)) goto out_free; free(data_files); if (do_lseek(handle, 0, SEEK_END) == (off_t)-1) return -1; if (out_update_section_header(handle, offset)) goto out_free; handle->file_state = TRACECMD_FILE_CPU_FLYRECORD; if (HAS_SECTIONS(handle)) tracecmd_write_options(handle); return 0; out_free: do_lseek(handle, 0, SEEK_END); free(data_files); return -1; } int tracecmd_write_cpu_data(struct tracecmd_output *handle, int cpus, char * const *cpu_data_files, const char *buff_name) { struct cpu_data_source *data; struct stat st; int size = 0; int ret; int i; if (!buff_name) buff_name = ""; data = calloc(cpus, sizeof(struct cpu_data_source)); if (!data) return -1; for (i = 0; i < cpus; i++) { ret = stat(cpu_data_files[i], &st); if (ret < 0) { tracecmd_warning("can not stat '%s'", cpu_data_files[i]); break; } data[i].fd = open(cpu_data_files[i], O_RDONLY); if (data[i].fd < 0) { tracecmd_warning("Can't read '%s'", data[i].fd); break; } data[i].size = st.st_size; data[i].offset = 0; size += st.st_size; } if (i < cpus) ret = -1; else ret = out_write_cpu_data(handle, cpus, data, buff_name); for (i--; i >= 0; i--) close(data[i].fd); free(data); return ret; } int tracecmd_append_cpu_data(struct tracecmd_output *handle, int cpus, char * const *cpu_data_files) { int ret; ret = tracecmd_write_cpus(handle, cpus); if (ret) return ret; ret = tracecmd_write_buffer_info(handle); if (ret) return ret; ret = tracecmd_write_options(handle); if (ret) return ret; return tracecmd_write_cpu_data(handle, cpus, cpu_data_files, NULL); } int tracecmd_append_buffer_cpu_data(struct tracecmd_output *handle, const char *name, int cpus, char * const *cpu_data_files) { return tracecmd_write_cpu_data(handle, cpus, cpu_data_files, name); } struct tracecmd_output *tracecmd_get_output_handle_fd(int fd) { struct tracecmd_output *handle = NULL; struct tracecmd_input *ihandle; const char *cname = NULL; const char *cver = NULL; int fd2; /* Move the file descriptor to the beginning */ if (lseek(fd, 0, SEEK_SET) == (off_t)-1) return NULL; /* dup fd to be used by the ihandle bellow */ fd2 = dup(fd); if (fd2 < 0) return NULL; /* get a input handle from this */ ihandle = tracecmd_alloc_fd(fd2, TRACECMD_FL_LOAD_NO_PLUGINS); if (!ihandle) return NULL; tracecmd_read_headers(ihandle, 0); /* move the file descriptor to the end */ if (lseek(fd, 0, SEEK_END) == (off_t)-1) goto out_free; /* create a partial output handle */ handle = calloc(1, sizeof(*handle)); if (!handle) goto out_free; handle->fd = fd; /* get tep, state, endian and page size */ handle->file_state = tracecmd_get_file_state(ihandle); /* Use the tep of the ihandle for later writes */ handle->pevent = tracecmd_get_tep(ihandle); tep_ref(handle->pevent); handle->page_size = tracecmd_page_size(ihandle); handle->file_version = tracecmd_get_in_file_version(ihandle); handle->options_start = get_last_option_offset(ihandle); handle->strings_offs = get_meta_strings_size(ihandle); list_head_init(&handle->options); list_head_init(&handle->buffers); if (!tracecmd_get_file_compress_proto(ihandle, &cname, &cver)) { handle->compress = tracecmd_compress_alloc(cname, cver, handle->fd, handle->pevent, handle->msg_handle); if (!handle->compress) goto out_free; } tracecmd_close(ihandle); return handle; out_free: tracecmd_close(ihandle); free(handle); return NULL; } /** * tracecmd_output_create - Create new output handle to a trace file with given name * @output_file: Name of the trace file that will be created. * * The @output_file parameter can be NULL. In this case the output handle is created * and initialized, but is not associated with a file. * * Returns pointer to created outpuy handle, or NULL in case of an error. */ struct tracecmd_output *tracecmd_output_create(const char *output_file) { struct tracecmd_output *out; int fd = -1; if (output_file) { fd = open(output_file, O_RDWR | O_CREAT | O_TRUNC | O_LARGEFILE, 0644); if (fd < 0) return NULL; } out = tracecmd_output_create_fd(fd); if (!out && fd >= 0) { close(fd); unlink(output_file); } return out; } /** * tracecmd_copy - copy the headers of one trace.dat file for another * @ihandle: input handle of the trace.dat file to copy * @file: the trace.dat file to create * @state: what data will be copied from the source handle * @file_version: version of the output file * @compression: compression of the output file, can be one of: * NULL - inherit compression from the input file * "any" - compress the output file with the best available algorithm * "none" - do not compress the output file * algorithm_name - compress the output file with specified algorithm * * Reads the header information and creates a new trace data file * with the same characteristics (events and all) and returns * tracecmd_output handle to this new file. */ struct tracecmd_output *tracecmd_copy(struct tracecmd_input *ihandle, const char *file, enum tracecmd_file_states state, int file_version, const char *compression) { enum tracecmd_file_states fstate; struct tracecmd_output *handle; handle = tracecmd_output_create(file); if (!handle) return NULL; if (tracecmd_output_set_from_input(handle, ihandle)) goto out_free; if (file_version >= FILE_VERSION_MIN) tracecmd_output_set_version(handle, file_version); if (compression && tracecmd_output_set_compression(handle, compression)) goto out_free; output_write_init(handle); fstate = state > TRACECMD_FILE_CPU_COUNT ? TRACECMD_FILE_CPU_COUNT : state; if (tracecmd_copy_headers(ihandle, handle, 0, fstate) < 0) goto out_free; if (tracecmd_copy_buffer_descr(ihandle, handle) < 0) goto out_free; if (state >= TRACECMD_FILE_OPTIONS && tracecmd_copy_options(ihandle, handle) < 0) goto out_free; if (state >= TRACECMD_FILE_CPU_LATENCY && tracecmd_copy_trace_data(ihandle, handle) < 0) goto out_free; if (HAS_SECTIONS(handle)) tracecmd_write_options(handle); /* The file is all ready to have cpu data attached */ return handle; out_free: if (handle) tracecmd_output_close(handle); unlink(file); return NULL; } __hidden void out_set_file_state(struct tracecmd_output *handle, int new_state) { handle->file_state = new_state; } __hidden bool check_out_state(struct tracecmd_output *handle, int new_state) { return check_file_state(handle->file_version, handle->file_state, new_state); } __hidden bool out_check_compression(struct tracecmd_output *handle) { return (handle->compress != NULL); } __hidden int out_save_options_offset(struct tracecmd_output *handle, unsigned long long start) { unsigned long long new, en8; if (HAS_SECTIONS(handle)) { /* Append to the previous options section, if any */ if (!handle->options_start) return -1; new = do_lseek(handle, 0, SEEK_CUR); if (do_lseek(handle, handle->options_start, SEEK_SET) == (off_t)-1) return -1; en8 = convert_endian_8(handle, start); if (do_write_check(handle, &en8, 8)) return -1; handle->options_start = new; if (do_lseek(handle, new, SEEK_SET) == (off_t)-1) return -1; } else { handle->options_start = start; } return 0; } /** * tracecmd_get_out_file_version - return the trace.dat file version * @handle: output handle for the trace.dat file */ unsigned long tracecmd_get_out_file_version(struct tracecmd_output *handle) { return handle->file_version; } size_t tracecmd_get_out_file_offset(struct tracecmd_output *handle) { return do_lseek(handle, 0, SEEK_CUR); } trace-cmd-v3.3.1/lib/trace-cmd/trace-perf.c000066400000000000000000000051161470231550600203520ustar00rootroot00000000000000// SPDX-License-Identifier: LGPL-2.1 /* * Copyright (C) 2021, VMware, Tzvetomir Stoyanov * */ #include #include #include #include "trace-cmd-private.h" static void default_perf_init_pe(struct perf_event_attr *pe) { pe->type = PERF_TYPE_SOFTWARE; pe->sample_type = PERF_SAMPLE_CPU; pe->size = sizeof(struct perf_event_attr); pe->config = PERF_COUNT_HW_CPU_CYCLES; pe->disabled = 1; pe->exclude_kernel = 1; pe->freq = 1; pe->sample_freq = 1000; pe->inherit = 1; pe->mmap = 1; pe->comm = 1; pe->task = 1; pe->precise_ip = 1; pe->sample_id_all = 1; pe->read_format = PERF_FORMAT_ID | PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING; } /** * trace_perf_init - Initialize perf context * * @perf: structure, representing perf context, that will be initialized. * @pages: Number of perf memory mapped pages. * @cpu: CPU number, associated with this perf context. * @pid: PID, associated with this perf context. * * The perf context in initialized with default values. The caller can set * custom perf parameters in perf->pe, before calling trace_perf_open() API. * * Returns 0 on success, or -1 in case of an error. * */ int __hidden trace_perf_init(struct trace_perf *perf, int pages, int cpu, int pid) { if (!perf) return -1; memset(perf, 0, sizeof(struct trace_perf)); default_perf_init_pe(&perf->pe); perf->cpu = cpu; perf->pages = pages; perf->pid = pid; perf->fd = -1; return 0; } /** * trace_perf_close - Close perf session * * @perf: structure, representing context of a running perf session, opened * with trace_perf_open() * */ void __hidden trace_perf_close(struct trace_perf *perf) { if (perf->fd >= 0) close(perf->fd); perf->fd = -1; if (perf->mmap && perf->mmap != MAP_FAILED) munmap(perf->mmap, (perf->pages + 1) * getpagesize()); perf->mmap = NULL; } /** * trace_perf_open - Open perf session * * @perf: structure, representing perf context that will be opened. It must be * initialized with trace_perf_init(). * * Returns 0 on success, or -1 in case of an error. In case of success, the * session must be closed with trace_perf_close() */ int __hidden trace_perf_open(struct trace_perf *perf) { perf->fd = syscall(__NR_perf_event_open, &perf->pe, perf->pid, perf->cpu, -1, 0); if (perf->fd < 0) return -1; fcntl(perf->fd, F_SETFL, O_NONBLOCK); perf->mmap = mmap(NULL, (perf->pages + 1) * getpagesize(), PROT_READ | PROT_WRITE, MAP_SHARED, perf->fd, 0); if (perf->mmap == MAP_FAILED) goto error; return 0; error: trace_perf_close(perf); return -1; } trace-cmd-v3.3.1/lib/trace-cmd/trace-plugin.c000066400000000000000000000161751470231550600207230ustar00rootroot00000000000000// SPDX-License-Identifier: LGPL-2.1 /* * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt * */ #include #include #include #include #include #include "trace-cmd.h" #include "trace-local.h" #include "trace-cmd-local.h" #define LOCAL_PLUGIN_DIR ".local/lib/trace-cmd/plugins/" struct trace_plugin_list { struct trace_plugin_list *next; char *name; void *handle; }; struct trace_plugin_context { enum tracecmd_context context; enum tracecmd_plugin_flag flags; union { void *data; struct tracecmd_input *trace_input; struct tracecmd_output *trace_output; }; }; /** * tracecmd_plugin_context_create - Create and initialize tracecmd plugins context. * @context: Context of the trace-cmd command. * @data: Pointer to the context specific data, which will be passed to plugins. * * Returns a pointer to created tracecmd plugins context, or NULL in case memory * allocation fails. The returned pointer should be freed by free (). */ struct trace_plugin_context * tracecmd_plugin_context_create(enum tracecmd_context context, void *data) { struct trace_plugin_context *trace; trace = calloc(1, sizeof(struct trace_plugin_context)); if (!trace) return NULL; trace->context = context; trace->data = data; return trace; } /** * tracecmd_plugin_set_flag - Set a flag to tracecmd plugins context. * @context: Context of the trace-cmd command. * @flag: Flag, whil will be set. * */ void tracecmd_plugin_set_flag(struct trace_plugin_context *context, enum tracecmd_plugin_flag flag) { if (context) context->flags |= flag; } /** * tracecmd_plugin_context_input - Get a tracecmd_input plugin context. * @context: Context of the trace-cmd command. * * Returns pointer to tracecmd_input, if such context is available or * NULL otherwise. */ struct tracecmd_input * tracecmd_plugin_context_input(struct trace_plugin_context *context) { if (!context || context->context != TRACECMD_INPUT) return NULL; return context->trace_input; } /** * tracecmd_plugin_context_output - Get a tracecmd_output plugin context * @context: Context of the trace-cmd command. * * Returns pointer to tracecmd_output, if such context is available or * NULL otherwise. */ struct tracecmd_output * tracecmd_plugin_context_output(struct trace_plugin_context *context) { if (!context || context->context != TRACECMD_OUTPUT) return NULL; return context->trace_output; } static void load_plugin(struct trace_plugin_context *trace, const char *path, const char *file, void *data) { struct trace_plugin_list **plugin_list = data; tracecmd_plugin_load_func func; struct trace_plugin_list *list; const char *alias; char *plugin; void *handle; int ret; ret = asprintf(&plugin, "%s/%s", path, file); if (ret < 0) { tracecmd_warning("could not allocate plugin memory"); return; } handle = dlopen(plugin, RTLD_NOW | RTLD_GLOBAL); if (!handle) { tracecmd_warning("could not load plugin '%s'\n%s", plugin, dlerror()); goto out_free; } alias = dlsym(handle, TRACECMD_PLUGIN_ALIAS_NAME); if (!alias) alias = file; func = dlsym(handle, TRACECMD_PLUGIN_LOADER_NAME); if (!func) { tracecmd_warning("could not find func '%s' in plugin '%s'\n%s", TRACECMD_PLUGIN_LOADER_NAME, plugin, dlerror()); goto out_close; } list = malloc(sizeof(*list)); if (!list) { tracecmd_warning("could not allocate plugin memory"); goto out_close; } list->next = *plugin_list; list->handle = handle; list->name = plugin; *plugin_list = list; tracecmd_info("registering plugin: %s", plugin); func(trace); return; out_close: dlclose(handle); out_free: free(plugin); } static void load_plugins_dir(struct trace_plugin_context *trace, const char *suffix, const char *path, void (*load_plugin)(struct trace_plugin_context *trace, const char *path, const char *name, void *data), void *data) { struct dirent *dent; struct stat st; DIR *dir; int ret; ret = stat(path, &st); if (ret < 0) return; if (!S_ISDIR(st.st_mode)) return; dir = opendir(path); if (!dir) return; while ((dent = readdir(dir))) { const char *name = dent->d_name; if (strcmp(name, ".") == 0 || strcmp(name, "..") == 0) continue; /* Only load plugins that end in suffix */ if (strcmp(name + (strlen(name) - strlen(suffix)), suffix) != 0) continue; load_plugin(trace, path, name, data); } closedir(dir); } static char *get_source_plugins_dir(void) { char *p, path[PATH_MAX+1]; int ret; ret = readlink("/proc/self/exe", path, PATH_MAX); if (ret > PATH_MAX || ret < 0) return NULL; path[ret] = 0; dirname(path); p = strrchr(path, '/'); if (!p) return NULL; /* Check if we are in the the source tree */ if (strcmp(p, "/tracecmd") != 0) return NULL; strcpy(p, "/lib/trace-cmd/plugins"); return strdup(path); } static void load_plugins_hook(struct trace_plugin_context *trace, const char *suffix, void (*load_plugin)(struct trace_plugin_context *trace, const char *path, const char *name, void *data), void *data) { char *home; char *path; char *envdir; int ret; if (trace && trace->flags & TRACECMD_DISABLE_PLUGINS) return; /* * If a system plugin directory was defined, * check that first. */ #ifdef PLUGIN_TRACECMD_DIR if (!trace || !(trace->flags & TRACECMD_DISABLE_SYS_PLUGINS)) load_plugins_dir(trace, suffix, PLUGIN_TRACECMD_DIR, load_plugin, data); #endif /* * Next let the environment-set plugin directory * override the system defaults. */ envdir = getenv("TRACECMD_PLUGIN_DIR"); if (envdir) load_plugins_dir(trace, suffix, envdir, load_plugin, data); /* * Now let the home directory override the environment * or system defaults. */ home = getenv("HOME"); if (!home) return; ret = asprintf(&path, "%s/%s", home, LOCAL_PLUGIN_DIR); if (ret < 0) { tracecmd_warning("could not allocate plugin memory"); return; } load_plugins_dir(trace, suffix, path, load_plugin, data); free(path); path = get_source_plugins_dir(); if (path) { load_plugins_dir(trace, suffix, path, load_plugin, data); free(path); } } /** * tracecmd_load_plugins - Load trace-cmd specific plugins. * @context: Context of the trace-cmd command, will be passed to the plugins * at load time. * * Returns a list of loaded plugins */ struct trace_plugin_list* tracecmd_load_plugins(struct trace_plugin_context *trace) { struct trace_plugin_list *list = NULL; load_plugins_hook(trace, ".so", load_plugin, &list); return list; } /** * tracecmd_unload_plugins - Unload trace-cmd specific plugins. * @plugin_list - List of plugins, previously loaded with tracecmd_load_plugins. * @context: Context of the trace-cmd command, will be passed to the plugins * at unload time. * */ void tracecmd_unload_plugins(struct trace_plugin_list *plugin_list, struct trace_plugin_context *trace) { tracecmd_plugin_unload_func func; struct trace_plugin_list *list; while (plugin_list) { list = plugin_list; plugin_list = list->next; func = dlsym(list->handle, TRACECMD_PLUGIN_UNLOADER_NAME); if (func) func(trace); dlclose(list->handle); free(list->name); free(list); } } trace-cmd-v3.3.1/lib/trace-cmd/trace-rbtree.c000066400000000000000000000226521470231550600207050ustar00rootroot00000000000000// SPDX-License-Identifier: LGPL-2.1 /* * Copyright (C) 2023 Google, Steven Rostedt * */ #include #include #include "trace-local.h" #include "trace-rbtree.h" enum { RED, BLACK, }; void __hidden trace_rbtree_init(struct trace_rbtree *tree, trace_rbtree_cmp_fn cmp_fn, trace_rbtree_search_fn search_fn) { memset(tree, 0, sizeof(*tree)); tree->search = search_fn; tree->cmp = cmp_fn; } static bool is_left(struct trace_rbtree_node *node) { return node == node->parent->left; } static struct trace_rbtree_node **get_parent_ptr(struct trace_rbtree *tree, struct trace_rbtree_node *node) { if (!node->parent) return &tree->node; else if (is_left(node)) return &node->parent->left; else return &node->parent->right; } static void rotate_left(struct trace_rbtree *tree, struct trace_rbtree_node *node) { struct trace_rbtree_node **parent_ptr = get_parent_ptr(tree, node); struct trace_rbtree_node *parent = node->parent; struct trace_rbtree_node *old_right = node->right; *parent_ptr = old_right; node->right = old_right->left; old_right->left = node; if (node->right) node->right->parent = node; node->parent = old_right; old_right->parent = parent; } static void rotate_right(struct trace_rbtree *tree, struct trace_rbtree_node *node) { struct trace_rbtree_node **parent_ptr = get_parent_ptr(tree, node); struct trace_rbtree_node *parent = node->parent; struct trace_rbtree_node *old_left = node->left; *parent_ptr = old_left; node->left = old_left->right; old_left->right = node; if (node->left) node->left->parent = node; node->parent = old_left; old_left->parent = parent; } static void insert_tree(struct trace_rbtree *tree, struct trace_rbtree_node *node) { struct trace_rbtree_node *next = tree->node; struct trace_rbtree_node *last_next = NULL; bool went_left = false; while (next) { last_next = next; if (tree->cmp(next, node) > 0) { next = next->right; went_left = false; } else { next = next->left; went_left = true; } } if (!last_next) { tree->node = node; return; } if (went_left) last_next->left = node; else last_next->right = node; node->parent = last_next; } #if 0 static int check_node(struct trace_rbtree *tree, struct trace_rbtree_node *node) { if (!node->parent) { if (tree->node != node) goto fail; } else { if (!is_left(node)) { if (node->parent->right != node) goto fail; } } return 0; fail: printf("FAILED ON NODE!"); breakpoint(); return -1; } static void check_tree(struct trace_rbtree *tree) { struct trace_rbtree_node *node = tree->node; if (node) { if (check_node(tree, node)) return; while (node->left) { node = node->left; if (check_node(tree, node)) return; } } while (node) { if (check_node(tree, node)) return; if (node->right) { node = node->right; if (check_node(tree, node)) return; while (node->left) { node = node->left; if (check_node(tree, node)) return; } continue; } while (node->parent) { if (is_left(node)) break; node = node->parent; if (check_node(tree, node)) return; } node = node->parent; } } #else static inline void check_tree(struct trace_rbtree *tree) { } #endif int __hidden trace_rbtree_insert(struct trace_rbtree *tree, struct trace_rbtree_node *node) { struct trace_rbtree_node *uncle; memset(node, 0, sizeof(*node)); insert_tree(tree, node); node->color = RED; while (node && node->parent && node->parent->color == RED) { if (is_left(node->parent)) { uncle = node->parent->parent->right; if (uncle && uncle->color == RED) { node->parent->color = BLACK; uncle->color = BLACK; node->parent->parent->color = RED; node = node->parent->parent; } else { if (!is_left(node)) { node = node->parent; rotate_left(tree, node); check_tree(tree); } node->parent->color = BLACK; node->parent->parent->color = RED; rotate_right(tree, node->parent->parent); check_tree(tree); } } else { uncle = node->parent->parent->left; if (uncle && uncle->color == RED) { node->parent->color = BLACK; uncle->color = BLACK; node->parent->parent->color = RED; node = node->parent->parent; } else { if (is_left(node)) { node = node->parent; rotate_right(tree, node); check_tree(tree); } node->parent->color = BLACK; node->parent->parent->color = RED; rotate_left(tree, node->parent->parent); check_tree(tree); } } } check_tree(tree); tree->node->color = BLACK; tree->nr_nodes++; return 0; } struct trace_rbtree_node *trace_rbtree_find(struct trace_rbtree *tree, const void *data) { struct trace_rbtree_node *node = tree->node; int ret; while (node) { ret = tree->search(node, data); if (!ret) return node; if (ret > 0) node = node->right; else node = node->left; } return NULL; } static struct trace_rbtree_node *next_node(struct trace_rbtree_node *node) { if (node->right) { node = node->right; while (node->left) node = node->left; return node; } while (node->parent && !is_left(node)) node = node->parent; return node->parent; } static void tree_fixup(struct trace_rbtree *tree, struct trace_rbtree_node *node) { while (node->parent && node->color == BLACK) { if (is_left(node)) { struct trace_rbtree_node *old_right = node->parent->right; if (old_right->color == RED) { old_right->color = BLACK; node->parent->color = RED; rotate_left(tree, node->parent); old_right = node->parent->right; } if (old_right->left->color == BLACK && old_right->right->color == BLACK) { old_right->color = RED; node = node->parent; } else { if (old_right->right->color == BLACK) { old_right->left->color = BLACK; old_right->color = RED; rotate_right(tree, old_right); old_right = node->parent->right; } old_right->color = node->parent->color; node->parent->color = BLACK; old_right->right->color = BLACK; rotate_left(tree, node->parent); node = tree->node; } } else { struct trace_rbtree_node *old_left = node->parent->left; if (old_left->color == RED) { old_left->color = BLACK; node->parent->color = RED; rotate_right(tree, node->parent); old_left = node->parent->left; } if (old_left->right->color == BLACK && old_left->left->color == BLACK) { old_left->color = RED; node = node->parent; } else { if (old_left->left->color == BLACK) { old_left->right->color = BLACK; old_left->color = RED; rotate_left(tree, old_left); old_left = node->parent->left; } old_left->color = node->parent->color; node->parent->color = BLACK; old_left->left->color = BLACK; rotate_right(tree, node->parent); node = tree->node; } } } node->color = BLACK; } void trace_rbtree_delete(struct trace_rbtree *tree, struct trace_rbtree_node *node) { struct trace_rbtree_node *x, *y; bool do_fixup = false; if (!node->left && !node->right && !node->parent) { tree->node = NULL; goto out; } if (!node->left || !node->right) y = node; else y = next_node(node); if (y->left) x = y->left; else x = y->right; if (x) x->parent = y->parent; if (!y->parent) { tree->node = x; } else { if (is_left(y)) y->parent->left = x; else y->parent->right = x; } do_fixup = y->color == BLACK; if (y != node) { y->color = node->color; y->parent = node->parent; y->left = node->left; y->right = node->right; if (y->left) y->left->parent = y; if (y->right) y->right->parent = y; if (!y->parent) { tree->node = y; } else { if (is_left(node)) y->parent->left = y; else y->parent->right = y; } } if (do_fixup) tree_fixup(tree, x); out: node->parent = node->left = node->right = NULL; tree->nr_nodes--; check_tree(tree); } __hidden struct trace_rbtree_node *trace_rbtree_next(struct trace_rbtree *tree, struct trace_rbtree_node *node) { check_tree(tree); /* * When either starting or the previous iteration returned a * node with a right branch, then go to the first node (if starting) * or the right node, and then return the left most node. */ if (!node || node->right) { if (!node) node = tree->node; else node = node->right; while (node && node->left) node = node->left; return node; } /* * If we are here, then the previous iteration returned the * left most node of the tree or the right branch. If this * is a left node, then simply return the parent. If this * is a right node, then keep going up until its a left node, * or we finished the iteration. * * If we are here and are the top node, then there is no right * node, and this is finished (return NULL). */ if (!node->parent || is_left(node)) return node->parent; do { node = node->parent; } while (node->parent && !is_left(node)); return node->parent; } /* * Used for freeing a tree, just quickly pop off the children in * no particular order. This will corrupt the tree! That is, * do not do any inserting or deleting of this tree after calling * this function. */ struct trace_rbtree_node *trace_rbtree_pop_nobalance(struct trace_rbtree *tree) { struct trace_rbtree_node *node = tree->node; if (!node) return NULL; while (node->left) node = node->left; while (node->right) node = node->right; if (node->parent) { if (is_left(node)) node->parent->left = NULL; else node->parent->right = NULL; } else { tree->node = NULL; } return node; } trace-cmd-v3.3.1/lib/trace-cmd/trace-recorder.c000066400000000000000000000241511470231550600212230ustar00rootroot00000000000000// SPDX-License-Identifier: LGPL-2.1 /* * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt * */ #include #include #include #include #include #include #include #include "tracefs.h" #include "trace-cmd-private.h" #include "trace-cmd-local.h" #include "event-utils.h" /* F_GETPIPE_SZ was introduced in 2.6.35, older systems don't have it */ #ifndef F_GETPIPE_SZ # define F_GETPIPE_SZ 1032 /* The Linux number for the option */ #endif #ifndef SPLICE_F_MOVE # define SPLICE_F_MOVE 1 # define SPLICE_F_NONBLOCK 2 # define SPLICE_F_MORE 4 # define SPLICE_F_GIFT 8 #endif #define POLL_TIMEOUT_MS 1000 struct tracecmd_recorder { struct tracefs_cpu *tcpu; int fd; int fd1; int fd2; int page_size; int subbuf_size; int cpu; int stop; int max; int pages; int count; unsigned flags; }; static int append_file(int size, int dst, int src) { char buf[size]; int r; lseek(src, 0, SEEK_SET); /* If there's an error, then we are pretty much screwed :-p */ do { r = read(src, buf, size); if (r < 0) return r; r = write(dst, buf, r); if (r < 0) return r; } while (r); return 0; } void tracecmd_free_recorder(struct tracecmd_recorder *recorder) { if (!recorder) return; if (recorder->max) { /* Need to put everything into fd1 */ if (recorder->fd == recorder->fd1) { int ret; /* * Crap, the older data is in fd2, and we need * to append fd1 onto it, and then copy over to fd1 */ ret = append_file(recorder->page_size, recorder->fd2, recorder->fd1); /* Error on copying, then just keep fd1 */ if (ret) { lseek(recorder->fd1, 0, SEEK_END); goto close; } lseek(recorder->fd1, 0, SEEK_SET); ftruncate(recorder->fd1, 0); } append_file(recorder->page_size, recorder->fd1, recorder->fd2); } close: tracefs_cpu_close(recorder->tcpu); if (recorder->fd1 >= 0) close(recorder->fd1); if (recorder->fd2 >= 0) close(recorder->fd2); free(recorder); } static int set_nonblock(struct tracecmd_recorder *recorder) { return tracefs_cpu_stop(recorder->tcpu); } static struct tracecmd_recorder * create_buffer_recorder_fd2(int fd, int fd2, int cpu, unsigned flags, struct tracefs_instance *instance, int maxkb, int tfd) { struct tracecmd_recorder *recorder; bool nonblock = false; recorder = malloc(sizeof(*recorder)); if (!recorder) return NULL; recorder->flags = flags; recorder->page_size = getpagesize(); if (maxkb) { int kb_per_page = recorder->page_size >> 10; if (!kb_per_page) kb_per_page = 1; recorder->max = maxkb / kb_per_page; /* keep max half */ recorder->max >>= 1; if (!recorder->max) recorder->max = 1; } else recorder->max = 0; recorder->count = 0; recorder->pages = 0; /* fd always points to what to write to */ recorder->fd = fd; recorder->fd1 = fd; recorder->fd2 = fd2; if (recorder->flags & TRACECMD_RECORD_POLL) nonblock = true; if (tfd >= 0) recorder->tcpu = tracefs_cpu_alloc_fd(tfd, recorder->page_size, nonblock); else recorder->tcpu = tracefs_cpu_open(instance, cpu, nonblock); if (!recorder->tcpu) goto out_free; recorder->subbuf_size = tracefs_cpu_read_size(recorder->tcpu); return recorder; out_free: tracecmd_free_recorder(recorder); return NULL; } struct tracecmd_recorder * tracecmd_create_buffer_recorder_fd2(int fd, int fd2, int cpu, unsigned flags, struct tracefs_instance *instance, int maxkb) { return create_buffer_recorder_fd2(fd, fd2, cpu, flags, instance, maxkb, -1); } struct tracecmd_recorder * tracecmd_create_buffer_recorder_fd(int fd, int cpu, unsigned flags, struct tracefs_instance *instance) { return tracecmd_create_buffer_recorder_fd2(fd, -1, cpu, flags, instance, 0); } static struct tracecmd_recorder * __tracecmd_create_buffer_recorder(const char *file, int cpu, unsigned flags, struct tracefs_instance *instance, int tfd, int maxkb) { struct tracecmd_recorder *recorder; int fd, fd2 = -1; char *file2; fd = open(file, O_RDWR | O_CREAT | O_TRUNC | O_LARGEFILE, 0644); if (fd < 0) return NULL; if (maxkb) { int len = strlen(file); file2 = malloc(len + 3); if (!file2) return NULL; sprintf(file2, "%s.1", file); fd2 = open(file2, O_RDWR | O_CREAT | O_TRUNC | O_LARGEFILE, 0644); if (fd2 < 0) { close(fd); unlink(file); free(file2); return NULL; } } recorder = create_buffer_recorder_fd2(fd, fd2, cpu, flags, instance, maxkb, tfd); if (!recorder) { close(fd); unlink(file); if (fd2 != -1) close(fd2); } if (fd2 != -1) { /* Unlink file2, we need to add everything to file at the end */ unlink(file2); free(file2); } return recorder; } struct tracecmd_recorder * tracecmd_create_buffer_recorder_maxkb(const char *file, int cpu, unsigned flags, struct tracefs_instance *instance, int maxkb) { struct tracecmd_recorder *recorder = NULL; char *file2; int len; int fd; int fd2; if (!maxkb) return tracecmd_create_buffer_recorder(file, cpu, flags, instance); len = strlen(file); file2 = malloc(len + 3); if (!file2) return NULL; sprintf(file2, "%s.1", file); fd = open(file, O_RDWR | O_CREAT | O_TRUNC | O_LARGEFILE, 0644); if (fd < 0) goto out; fd2 = open(file2, O_RDWR | O_CREAT | O_TRUNC | O_LARGEFILE, 0644); if (fd2 < 0) goto err; recorder = tracecmd_create_buffer_recorder_fd2(fd, fd2, cpu, flags, instance, maxkb); if (!recorder) goto err2; out: /* Unlink file2, we need to add everything to file at the end */ unlink(file2); free(file2); return recorder; err2: close(fd2); err: close(fd); unlink(file); goto out; } struct tracecmd_recorder * tracecmd_create_buffer_recorder(const char *file, int cpu, unsigned flags, struct tracefs_instance *instance) { return __tracecmd_create_buffer_recorder(file, cpu, flags, instance, -1, 0); } /** * tracecmd_create_recorder_virt - Create a recorder reading tracing data * from the trace_fd file descriptor instead of from the local tracefs * @file: output filename where tracing data will be written * @cpu: which CPU is being traced * @flags: flags configuring the recorder (see TRACECMD_RECORDER_* enums) * @trace_fd: file descriptor from where tracing data will be read */ struct tracecmd_recorder * tracecmd_create_recorder_virt(const char *file, int cpu, unsigned flags, int trace_fd, int maxkb) { return __tracecmd_create_buffer_recorder(file, cpu, flags, NULL, trace_fd, maxkb); } struct tracecmd_recorder *tracecmd_create_recorder_fd(int fd, int cpu, unsigned flags) { return tracecmd_create_buffer_recorder_fd(fd, cpu, flags, NULL); } struct tracecmd_recorder *tracecmd_create_recorder(const char *file, int cpu, unsigned flags) { return tracecmd_create_buffer_recorder(file, cpu, flags, NULL); } struct tracecmd_recorder * tracecmd_create_recorder_maxkb(const char *file, int cpu, unsigned flags, int maxkb) { return tracecmd_create_buffer_recorder_maxkb(file, cpu, flags, NULL, maxkb); } static inline void update_fd(struct tracecmd_recorder *recorder, int size) { int fd; if (!recorder->max) return; recorder->count += size; if (recorder->count >= recorder->page_size) { recorder->pages += recorder->count / recorder->page_size; recorder->count = 0; } if (recorder->pages < recorder->max) return; recorder->pages = 0; fd = recorder->fd; /* Swap fd to next file. */ if (fd == recorder->fd1) fd = recorder->fd2; else fd = recorder->fd1; /* Zero out the new file we are writing to */ lseek(fd, 0, SEEK_SET); ftruncate(fd, 0); recorder->fd = fd; } /* * Returns -1 on error. * or bytes of data read. */ static long read_data(struct tracecmd_recorder *recorder) { bool nonblock = recorder->stop; char buf[recorder->subbuf_size]; long left; long r, w; r = tracefs_cpu_read(recorder->tcpu, buf, nonblock); if (r < 0) return r; left = r; do { w = write(recorder->fd, buf + (r - left), left); if (w > 0) { left -= w; update_fd(recorder, w); } } while (w >= 0 && left); if (w < 0) r = w; return r; } /* * Returns -1 on error. * or bytes of data read. */ static long direct_splice_data(struct tracecmd_recorder *recorder) { bool nonblock = recorder->stop; return tracefs_cpu_pipe(recorder->tcpu, recorder->fd, nonblock); } static long move_data(struct tracecmd_recorder *recorder) { bool nonblock = recorder->stop; long ret; if (recorder->flags & TRACECMD_RECORD_NOSPLICE) return read_data(recorder); if (recorder->flags & TRACECMD_RECORD_NOBRASS) return direct_splice_data(recorder); ret = tracefs_cpu_write(recorder->tcpu, recorder->fd, nonblock); if (ret > 0) update_fd(recorder, ret); return ret; } long tracecmd_flush_recording(struct tracecmd_recorder *recorder, bool finish) { char buf[recorder->subbuf_size]; long total = 0; long wrote = 0; long ret; if (!recorder) return 0; if (!finish) return tracefs_cpu_flush_write(recorder->tcpu, recorder->fd); set_nonblock(recorder); do { ret = tracefs_cpu_flush_write(recorder->tcpu, recorder->fd); if (ret > 0) wrote += ret; } while (ret > 0); /* Make sure we finish off with a page size boundary */ wrote &= recorder->subbuf_size - 1; if (wrote) { memset(buf, 0, recorder->subbuf_size); write(recorder->fd, buf, recorder->subbuf_size - wrote); total += recorder->subbuf_size; } return total; } int tracecmd_start_recording(struct tracecmd_recorder *recorder, unsigned long sleep) { struct timespec req = { .tv_sec = sleep / 1000000, .tv_nsec = (sleep % 1000000) * 1000, }; long read = 1; long ret; recorder->stop = 0; do { /* Only sleep if we did not read anything last time */ if (!read && sleep) nanosleep(&req, NULL); read = 0; do { ret = move_data(recorder); if (ret < 0) { if (errno == EINTR) continue; if ((recorder->flags & TRACECMD_RECORD_POLL) && errno == EAGAIN) continue; return ret; } read += ret; } while (ret > 0 && !recorder->stop); } while (!recorder->stop); /* Flush out the rest */ ret = tracecmd_flush_recording(recorder, true); if (ret < 0) return ret; return 0; } int tracecmd_stop_recording(struct tracecmd_recorder *recorder) { if (!recorder) return -1; recorder->stop = 1; return set_nonblock(recorder); } trace-cmd-v3.3.1/lib/trace-cmd/trace-timesync-kvm.c000066400000000000000000000343761470231550600220560ustar00rootroot00000000000000// SPDX-License-Identifier: LGPL-2.1 /* * Copyright (C) 2020, VMware, Tzvetomir Stoyanov tz.stoyanov@gmail.com> * */ #include #include #include #include #include #include #include #include "trace-cmd.h" #include "trace-cmd-private.h" #include "tracefs.h" #include "trace-tsync-local.h" #define KVM_DEBUG_OFFSET_FILE "tsc-offset" #define KVM_DEBUG_SCALING_FILE "tsc-scaling-ratio" #define KVM_DEBUG_FRACTION_FILE "tsc-scaling-ratio-frac-bits" #define KVM_DEBUG_VCPU_DIR "vcpu" /* default KVM scaling values, taken from the Linux kernel */ #define KVM_SCALING_AMD_DEFAULT (1ULL<<32) #define KVM_SCALING_INTEL_DEFAULT (1ULL<<48) #define KVM_SYNC_PKT_REQUEST 1 #define KVM_SYNC_PKT_RESPONSE 2 typedef __s64 s64; #define KVM_ACCURACY 0 #define KVM_NAME "kvm" struct kvm_clock_files { int vcpu; char *offsets; char *scalings; char *frac; }; struct kvm_clock_sync { int vcpu_count; int marker_fd; struct kvm_clock_files *clock_files; struct tep_handle *tep; int raw_id; unsigned long long ts; }; struct kvm_clock_offset_msg { s64 ts; s64 offset; s64 scaling; s64 frac; }; static int read_ll_from_file(char *file, long long *res) { char buf[32]; int ret; int fd; if (!file) return -1; fd = open(file, O_RDONLY | O_NONBLOCK); if (fd < 0) return -1; ret = read(fd, buf, 32); close(fd); if (ret <= 0) return -1; *res = strtoll(buf, NULL, 0); return 0; } /* * Returns true if both scaling and fraction exist or both do * not exist. false if one exists without the other or if there * is a memory error. */ static bool kvm_scaling_check_vm_cpu(char *vname, char *cpu) { bool has_scaling = false; bool has_frac = false; struct stat st; char *path; int ret; if (asprintf(&path, "%s/%s/%s", vname, cpu, KVM_DEBUG_SCALING_FILE) < 0) return false; ret = stat(path, &st); free(path); if (!ret) has_scaling = true; if (asprintf(&path, "%s/%s/%s", vname, cpu, KVM_DEBUG_FRACTION_FILE) < 0) return false; ret = stat(path, &st); free(path); if (!ret) has_frac = true; if (has_scaling != has_frac) return false; return true; } static const char *kvm_debug_dir(void) { const char *debugfs; static char *kvm_dir; if (kvm_dir) return kvm_dir; debugfs = tracefs_debug_dir(); if (!debugfs) return NULL; if (asprintf(&kvm_dir, "%s/kvm", debugfs) < 0) return NULL; return kvm_dir; } /* * Returns true if a VCPU exists with a tsc-offset file and that * the scaling files for ratio and fraction both exist or both * do not exist. False if there is no VM with a tsc-offset or * there is only one of the two scaling files, or there's a * memory issue. */ static bool kvm_scaling_check_vm(char *name) { struct dirent *entry; const char *kvm; char *vdir; DIR *dir; bool valid = false; kvm = kvm_debug_dir(); if (!kvm) return false; if (asprintf(&vdir, "%s/%s", kvm, name) < 0) return false; dir = opendir(vdir); if (!dir) { free(vdir); return false; } while ((entry = readdir(dir))) { if (entry->d_type == DT_DIR && !strncmp(entry->d_name, "vcpu", 4)) { if (!kvm_scaling_check_vm_cpu(vdir, entry->d_name)) break; valid = true; } } closedir(dir); free(vdir); return valid && entry == NULL; } /* * Returns true if all VMs have a tsc-offset file and that * the scaling files for ratio and fraction both exist or both * do not exist. False if a VM with a tsc-offset or there is only * one of the two scaling files, or no VM exists or there's a memory issue. */ static bool kvm_scaling_check(void) { struct dirent *entry; const char *kvm; DIR *dir; bool valid = false; kvm = kvm_debug_dir(); if (!kvm) return false; dir = opendir(kvm); if (!dir) return true; while ((entry = readdir(dir))) { if (entry->d_type == DT_DIR && isdigit(entry->d_name[0])) { if (!kvm_scaling_check_vm(entry->d_name)) break; valid = true; } } closedir(dir); return valid && entry == NULL; } static bool kvm_support_check(bool guest) { const char *kvm; /* The kvm files are only in the host so we can ignore guests */ if (guest) return true; kvm = kvm_debug_dir(); if (!kvm) return false; return kvm_scaling_check(); } static int kvm_open_vcpu_dir(struct kvm_clock_sync *kvm, int i, char *dir_str) { struct dirent *entry; char path[PATH_MAX]; DIR *dir; dir = opendir(dir_str); if (!dir) goto error; while ((entry = readdir(dir))) { if (entry->d_type != DT_DIR) { if (!strcmp(entry->d_name, KVM_DEBUG_OFFSET_FILE)) { snprintf(path, sizeof(path), "%s/%s", dir_str, entry->d_name); kvm->clock_files[i].offsets = strdup(path); } if (!strcmp(entry->d_name, KVM_DEBUG_SCALING_FILE)) { snprintf(path, sizeof(path), "%s/%s", dir_str, entry->d_name); kvm->clock_files[i].scalings = strdup(path); } if (!strcmp(entry->d_name, KVM_DEBUG_FRACTION_FILE)) { snprintf(path, sizeof(path), "%s/%s", dir_str, entry->d_name); kvm->clock_files[i].frac = strdup(path); } } } if (!kvm->clock_files[i].offsets) goto error; closedir(dir); return 0; error: if (dir) closedir(dir); free(kvm->clock_files[i].offsets); kvm->clock_files[i].offsets = NULL; free(kvm->clock_files[i].scalings); kvm->clock_files[i].scalings = NULL; free(kvm->clock_files[i].frac); kvm->clock_files[i].frac = NULL; return -1; } static int cmp_clock(const void *A, const void *B) { const struct kvm_clock_files *a = A; const struct kvm_clock_files *b = B; if (a->vcpu < b->vcpu) return -1; return a->vcpu > b->vcpu; } static int kvm_open_debug_files(struct kvm_clock_sync *kvm, int pid) { char *vm_dir_str = NULL; struct dirent *entry; char *pid_str = NULL; char path[PATH_MAX]; long vcpu; DIR *dir; int i; dir = opendir(kvm_debug_dir()); if (!dir) goto error; if (asprintf(&pid_str, "%d-", pid) <= 0) goto error; while ((entry = readdir(dir))) { if (!(entry->d_type == DT_DIR && !strncmp(entry->d_name, pid_str, strlen(pid_str)))) continue; asprintf(&vm_dir_str, "%s/%s", kvm_debug_dir(), entry->d_name); break; } closedir(dir); dir = NULL; if (!vm_dir_str) goto error; dir = opendir(vm_dir_str); if (!dir) goto error; i = 0; while ((entry = readdir(dir))) { if (!(entry->d_type == DT_DIR && !strncmp(entry->d_name, KVM_DEBUG_VCPU_DIR, strlen(KVM_DEBUG_VCPU_DIR)))) continue; if (i == kvm->vcpu_count) goto error; vcpu = strtol(entry->d_name + strlen(KVM_DEBUG_VCPU_DIR), NULL, 10); kvm->clock_files[i].vcpu = vcpu; snprintf(path, sizeof(path), "%s/%s", vm_dir_str, entry->d_name); if (kvm_open_vcpu_dir(kvm, i, path) < 0) goto error; i++; } if (i < kvm->vcpu_count) goto error; qsort(kvm->clock_files, kvm->vcpu_count, sizeof(*kvm->clock_files), cmp_clock); closedir(dir); free(pid_str); free(vm_dir_str); return 0; error: free(pid_str); free(vm_dir_str); if (dir) closedir(dir); return -1; } static int kvm_clock_sync_init_host(struct tracecmd_time_sync *tsync, struct kvm_clock_sync *kvm) { kvm->vcpu_count = tsync->vcpu_count; kvm->clock_files = calloc(kvm->vcpu_count, sizeof(*kvm->clock_files)); if (!kvm->clock_files) goto error; if (kvm_open_debug_files(kvm, tsync->guest_pid) < 0) goto error; return 0; error: free(kvm->clock_files); return -1; } static int kvm_clock_sync_init_guest(struct tracecmd_time_sync *tsync, struct kvm_clock_sync *kvm) { const char *systems[] = {"ftrace", NULL}; struct clock_sync_context *clock_context; struct tep_event *raw; char *path; clock_context = (struct clock_sync_context *)tsync->context; path = tracefs_instance_get_dir(clock_context->instance); if (!path) goto error; kvm->tep = tracefs_local_events_system(path, systems); tracefs_put_tracing_file(path); if (!kvm->tep) goto error; raw = tep_find_event_by_name(kvm->tep, "ftrace", "raw_data"); if (!raw) goto error; kvm->raw_id = raw->id; tep_set_file_bigendian(kvm->tep, tracecmd_host_bigendian()); tep_set_local_bigendian(kvm->tep, tracecmd_host_bigendian()); path = tracefs_instance_get_file(clock_context->instance, "trace_marker_raw"); if (!path) goto error; kvm->marker_fd = open(path, O_WRONLY); tracefs_put_tracing_file(path); return 0; error: if (kvm->tep) tep_free(kvm->tep); if (kvm->marker_fd >= 0) close(kvm->marker_fd); return -1; } static int kvm_clock_sync_init(struct tracecmd_time_sync *tsync) { struct clock_sync_context *clock_context; struct kvm_clock_sync *kvm; int ret; if (!tsync || !tsync->context) return -1; clock_context = (struct clock_sync_context *)tsync->context; if (!kvm_support_check(clock_context->is_guest)) return -1; kvm = calloc(1, sizeof(struct kvm_clock_sync)); if (!kvm) return -1; kvm->marker_fd = -1; if (clock_context->is_guest) ret = kvm_clock_sync_init_guest(tsync, kvm); else ret = kvm_clock_sync_init_host(tsync, kvm); if (ret < 0) goto error; clock_context->proto_data = kvm; return 0; error: free(kvm); return -1; } static int kvm_clock_sync_free(struct tracecmd_time_sync *tsync) { struct clock_sync_context *clock_context; struct kvm_clock_sync *kvm = NULL; int i; clock_context = (struct clock_sync_context *)tsync->context; if (clock_context) kvm = (struct kvm_clock_sync *)clock_context->proto_data; if (kvm) { for (i = 0; i < kvm->vcpu_count; i++) { free(kvm->clock_files[i].offsets); free(kvm->clock_files[i].scalings); free(kvm->clock_files[i].frac); } if (kvm->tep) tep_free(kvm->tep); if (kvm->marker_fd >= 0) close(kvm->marker_fd); free(kvm); } return -1; } static int kvm_clock_host(struct tracecmd_time_sync *tsync, long long *offset, long long *scaling, long long *frac, long long *timestamp, unsigned int cpu) { char sync_proto[TRACECMD_TSYNC_PNAME_LENGTH]; struct clock_sync_context *clock_context; struct kvm_clock_offset_msg packet; struct kvm_clock_sync *kvm = NULL; long long kvm_scaling = 1; unsigned int sync_msg; long long kvm_offset; long long kvm_frac = 0; unsigned int size; char *msg; int ret; clock_context = (struct clock_sync_context *)tsync->context; if (clock_context) kvm = (struct kvm_clock_sync *)clock_context->proto_data; if (!kvm || !kvm->clock_files || !kvm->clock_files[0].offsets) return -1; if (cpu >= kvm->vcpu_count) return -1; ret = read_ll_from_file(kvm->clock_files[cpu].offsets, &kvm_offset); if (ret < 0) return -1; if (kvm->clock_files[cpu].scalings) { read_ll_from_file(kvm->clock_files[cpu].scalings, &kvm_scaling); if (kvm_scaling == KVM_SCALING_AMD_DEFAULT || kvm_scaling == KVM_SCALING_INTEL_DEFAULT) kvm_scaling = 1; } if (kvm->clock_files[cpu].frac && kvm_scaling != 1) ret = read_ll_from_file(kvm->clock_files[cpu].frac, &kvm_frac); msg = (char *)&packet; size = sizeof(packet); ret = tracecmd_msg_recv_time_sync(tsync->msg_handle, sync_proto, &sync_msg, &size, &msg); if (ret || strncmp(sync_proto, KVM_NAME, TRACECMD_TSYNC_PNAME_LENGTH) || sync_msg != KVM_SYNC_PKT_REQUEST) return -1; packet.offset = -kvm_offset; packet.scaling = kvm_scaling; packet.frac = kvm_frac; ret = tracecmd_msg_send_time_sync(tsync->msg_handle, KVM_NAME, KVM_SYNC_PKT_RESPONSE, sizeof(packet), (char *)&packet); if (ret) return -1; *scaling = packet.scaling; *offset = packet.offset; *frac = kvm_frac; *timestamp = packet.ts; return 0; } #define KVM_EVENT_MARKER "kvm sync event" static int kvm_marker_find(struct tep_event *event, struct tep_record *record, int cpu, void *context) { struct kvm_clock_sync *kvm = (struct kvm_clock_sync *)context; struct tep_format_field *field; struct tep_format_field *id; char *marker; /* Make sure this is our event */ if (event->id != kvm->raw_id) return 0; id = tep_find_field(event, "id"); field = tep_find_field(event, "buf"); if (field && id && record->size >= (id->offset + strlen(KVM_EVENT_MARKER) + 1)) { marker = (char *)(record->data + id->offset); if (!strcmp(marker, KVM_EVENT_MARKER)) { kvm->ts = record->ts; return 1; } } return 0; } static int kvm_clock_guest(struct tracecmd_time_sync *tsync, long long *offset, long long *scaling, long long *frac, long long *timestamp) { char sync_proto[TRACECMD_TSYNC_PNAME_LENGTH]; struct clock_sync_context *clock_context; struct kvm_clock_offset_msg packet; struct kvm_clock_sync *kvm = NULL; unsigned int sync_msg; unsigned int size; char *msg; int ret; clock_context = (struct clock_sync_context *)tsync->context; if (clock_context) kvm = (struct kvm_clock_sync *)clock_context->proto_data; if (!kvm) return -1; kvm->ts = 0; memset(&packet, 0, sizeof(packet)); tracefs_instance_file_write(clock_context->instance, "trace", "\0"); write(kvm->marker_fd, KVM_EVENT_MARKER, strlen(KVM_EVENT_MARKER) + 1); kvm->ts = 0; tracefs_iterate_raw_events(kvm->tep, clock_context->instance, NULL, 0, kvm_marker_find, kvm); packet.ts = kvm->ts; ret = tracecmd_msg_send_time_sync(tsync->msg_handle, KVM_NAME, KVM_SYNC_PKT_REQUEST, sizeof(packet), (char *)&packet); if (ret) return -1; msg = (char *)&packet; size = sizeof(packet); ret = tracecmd_msg_recv_time_sync(tsync->msg_handle, sync_proto, &sync_msg, &size, &msg); if (ret || strncmp(sync_proto, KVM_NAME, TRACECMD_TSYNC_PNAME_LENGTH) || sync_msg != KVM_SYNC_PKT_RESPONSE) return -1; *scaling = packet.scaling; *offset = packet.offset; *frac = packet.frac; *timestamp = packet.ts; return 0; } static int kvm_clock_sync_calc(struct tracecmd_time_sync *tsync, long long *offset, long long *scaling, long long *frac, long long *timestamp, unsigned int cpu) { struct clock_sync_context *clock_context; int ret; if (!tsync || !tsync->context) return -1; clock_context = (struct clock_sync_context *)tsync->context; if (clock_context->is_guest) ret = kvm_clock_guest(tsync, offset, scaling, frac, timestamp); else ret = kvm_clock_host(tsync, offset, scaling, frac, timestamp, cpu); return ret; } int kvm_clock_sync_register(void) { int role = TRACECMD_TIME_SYNC_ROLE_GUEST; int clock = 0; if (kvm_support_check(false)) { role |= TRACECMD_TIME_SYNC_ROLE_HOST; clock = TRACECMD_CLOCK_X86_TSC; } return tracecmd_tsync_proto_register(KVM_NAME, KVM_ACCURACY, role, clock, 0, kvm_clock_sync_init, kvm_clock_sync_free, kvm_clock_sync_calc); } int kvm_clock_sync_unregister(void) { return tracecmd_tsync_proto_unregister(KVM_NAME); } trace-cmd-v3.3.1/lib/trace-cmd/trace-timesync-ptp.c000066400000000000000000000450741470231550600220610ustar00rootroot00000000000000// SPDX-License-Identifier: LGPL-2.1 /* * Copyright (C) 2019, VMware, Tzvetomir Stoyanov tz.stoyanov@gmail.com> * */ #include #include #include #include #include #include #include #include #include #include "trace-cmd.h" #include "trace-cmd-private.h" #include "tracefs.h" #include "trace-tsync-local.h" #include "trace-msg.h" #include "trace-cmd-local.h" typedef __be32 be32; typedef __u64 u64; typedef __s64 s64; #define PTP_SYNC_LOOP 339 #define PTP_SYNC_PKT_START 1 #define PTP_SYNC_PKT_PROBE 2 #define PTP_SYNC_PKT_PROBES 3 #define PTP_SYNC_PKT_OFFSET 4 #define PTP_SYNC_PKT_END 5 /* print time sync debug messages */ /* #define TSYNC_DEBUG */ struct ptp_clock_sync { struct tep_handle *tep; struct tep_format_field *id; int raw_id; int marker_fd; int series_id; int flags; int debug_fd; }; enum { /* * Consider only the probe with fastest response time, * otherwise make a histogram from all probes. */ PTP_FLAG_FASTEST_RESPONSE = (1 << 0), /* * Use trace marker to get the clock, * otherwise use the system clock directly. */ PTP_FLAG_USE_MARKER = (1 << 1), }; static int ptp_flags = PTP_FLAG_FASTEST_RESPONSE | PTP_FLAG_USE_MARKER; /* * Calculated using formula [CPU rate]*[calculated offset deviation] * tested on 3GHz CPU, with x86-tsc trace clock and compare the calculated * offset with /sys/kernel/debug/kvm//vcpu0/tsc-offset * measured 2000ns deviation * using PTP flags PTP_FLAG_FASTEST_RESPONSE | PTP_FLAG_USE_MARKER */ #define PTP_ACCURACY 6000 #define PTP_NAME "ptp" struct ptp_clock_start_msg { be32 series_id; be32 flags; } __packed; struct ptp_clock_sample { s64 ts; be32 id; } __packed; struct ptp_clock_result_msg { be32 series_id; be32 count; struct ptp_clock_sample samples[2*PTP_SYNC_LOOP]; } __packed; struct ptp_clock_offset_msg { s64 ts; s64 offset; }; struct ptp_markers_context { struct clock_sync_context *clock; struct ptp_clock_sync *ptp; struct ptp_clock_result_msg msg; int size; }; struct ptp_marker_buf { int local_id; int remote_id; int count; int packet_id; } __packed; struct ptp_marker { int series_id; struct ptp_marker_buf data; } __packed; static int ptp_clock_sync_init(struct tracecmd_time_sync *tsync) { const char *systems[] = {"ftrace", NULL}; struct clock_sync_context *clock_context; struct ptp_clock_sync *ptp; struct tep_event *raw; char *path; if (!tsync || !tsync->context) return -1; clock_context = (struct clock_sync_context *)tsync->context; if (clock_context->proto_data) return 0; ptp = calloc(1, sizeof(struct ptp_clock_sync)); if (!ptp) return -1; ptp->marker_fd = -1; ptp->debug_fd = -1; path = tracefs_instance_get_dir(clock_context->instance); if (!path) goto error; ptp->tep = tracefs_local_events_system(path, systems); tracefs_put_tracing_file(path); if (!ptp->tep) goto error; raw = tep_find_event_by_name(ptp->tep, "ftrace", "raw_data"); if (!raw) goto error; ptp->id = tep_find_field(raw, "id"); if (!ptp->id) goto error; ptp->raw_id = raw->id; tep_set_file_bigendian(ptp->tep, tracecmd_host_bigendian()); tep_set_local_bigendian(ptp->tep, tracecmd_host_bigendian()); path = tracefs_instance_get_file(clock_context->instance, "trace_marker_raw"); if (!path) goto error; ptp->marker_fd = open(path, O_WRONLY); tracefs_put_tracing_file(path); clock_context->proto_data = ptp; #ifdef TSYNC_DEBUG if (clock_context->is_server) { char buff[256]; int res_fd; sprintf(buff, "res-id%d.txt", clock_context->remote_id); res_fd = open(buff, O_CREAT|O_WRONLY|O_TRUNC, 0644); if (res_fd > 0) close(res_fd); } #endif return 0; error: if (ptp) { tep_free(ptp->tep); if (ptp->marker_fd >= 0) close(ptp->marker_fd); } free(ptp); return -1; } static int ptp_clock_sync_free(struct tracecmd_time_sync *tsync) { struct clock_sync_context *clock_context; struct ptp_clock_sync *ptp; if (!tsync || !tsync->context) return -1; clock_context = (struct clock_sync_context *)tsync->context; if (clock_context && clock_context->proto_data) { ptp = (struct ptp_clock_sync *)clock_context->proto_data; tep_free(ptp->tep); if (ptp->marker_fd >= 0) close(ptp->marker_fd); if (ptp->debug_fd >= 0) close(ptp->debug_fd); free(clock_context->proto_data); clock_context->proto_data = NULL; } return 0; } /* Save the timestamps of sent ('s') and returned ('r') probes in the * ctx->msg.samples[] array. Depending of the context (server or client), there * may be only returned probes, or both sent and returned probes. The returned * probes are saved first in the array, after them are the sent probes. * Depending of the context, the array can be with size: * [0 .. max data.count] - holds only returned probes * [0 .. 2 * max data.count] - holds both returned and sent probes */ static void ptp_probe_store(struct ptp_markers_context *ctx, struct ptp_marker *marker, unsigned long long ts) { int index = -1; if (marker->data.packet_id == 'r' && marker->data.count <= ctx->size) { index = marker->data.count - 1; } else if (marker->data.packet_id == 's' && marker->data.count * 2 <= ctx->size){ index = ctx->size / 2 + marker->data.count - 1; } if (index >= 0) { ctx->msg.samples[index].id = marker->data.count; ctx->msg.samples[index].ts = ts; ctx->msg.count++; } } static int ptp_marker_find(struct tep_event *event, struct tep_record *record, int cpu, void *context) { struct ptp_markers_context *ctx; struct ptp_marker *marker; ctx = (struct ptp_markers_context *)context; /* Make sure this is our event */ if (event->id != ctx->ptp->raw_id || !ctx->ptp->id) return 0; if (record->size >= (ctx->ptp->id->offset + sizeof(struct ptp_marker))) { marker = (struct ptp_marker *)(record->data + ctx->ptp->id->offset); if (marker->data.local_id == ctx->clock->local_id && marker->data.remote_id == ctx->clock->remote_id && marker->series_id == ctx->ptp->series_id && marker->data.count) ptp_probe_store(ctx, marker, record->ts); } return 0; } static inline bool good_probe(struct ptp_clock_sample *server_sample, struct ptp_clock_sample *send_sample, struct ptp_clock_sample *client_sample, int *bad_probes) { if (server_sample->ts && send_sample->ts && client_sample->ts && server_sample->id == send_sample->id && server_sample->id == client_sample->id) return true; (*bad_probes)++; return false; } static int ptp_calc_offset_fastest(struct clock_sync_context *clock, struct ptp_clock_result_msg *server, struct ptp_clock_result_msg *client, long long *offset_ret, long long *ts_ret, int *bad_probes) { struct ptp_clock_sample *sample_send; long long delta_min = LLONG_MAX; long long offset = 0; long long delta = 0; long long ts = 0; int max_i; int i; *bad_probes = 0; sample_send = server->samples + (server->count / 2); max_i = server->count / 2 < client->count ? server->count / 2 : client->count; for (i = 0; i < max_i; i++) { if (!good_probe(&server->samples[i], &sample_send[i], &client->samples[i], bad_probes)) continue; ts = (sample_send[i].ts + server->samples[i].ts) / 2; offset = client->samples[i].ts - ts; delta = server->samples[i].ts - sample_send[i].ts; if (delta_min > delta) { delta_min = delta; *offset_ret = offset; *ts_ret = ts; } #ifdef TSYNC_DEBUG { struct ptp_clock_sync *ptp; ptp = (struct ptp_clock_sync *)clock->proto_data; if (ptp && ptp->debug_fd > 0) { char buff[256]; sprintf(buff, "%lld %lld %lld\n", ts, client->samples[i].ts, offset); write(ptp->debug_fd, buff, strlen(buff)); } } #endif } return 0; } static int ptp_calc_offset_hist(struct clock_sync_context *clock, struct ptp_clock_result_msg *server, struct ptp_clock_result_msg *client, long long *offset_ret, long long *ts_ret, int *bad_probes) { struct ptp_clock_sample *sample_send; long long timestamps[PTP_SYNC_LOOP]; long long offsets[PTP_SYNC_LOOP]; long long offset_min = LLONG_MAX; long long offset_max = 0; int hist[PTP_SYNC_LOOP]; int ind, max = 0; long long bin; int i, k = 0; *bad_probes = 0; memset(hist, 0, sizeof(int) * PTP_SYNC_LOOP); sample_send = server->samples + (server->count / 2); for (i = 0; i * 2 < server->count && i < client->count; i++) { if (!good_probe(&server->samples[i], &sample_send[i], &client->samples[i], bad_probes)) continue; timestamps[k] = (sample_send[i].ts + server->samples[i].ts) / 2; offsets[k] = client->samples[i].ts - timestamps[k]; if (offset_max < llabs(offsets[k])) offset_max = llabs(offsets[k]); if (offset_min > llabs(offsets[k])) offset_min = llabs(offsets[k]); #ifdef TSYNC_DEBUG { struct ptp_clock_sync *ptp; ptp = (struct ptp_clock_sync *)clock->proto_data; if (ptp && ptp->debug_fd > 0) { char buff[256]; sprintf(buff, "%lld %lld %lld\n", timestamps[k], client->samples[i].ts, offsets[k]); write(ptp->debug_fd, buff, strlen(buff)); } } #endif k++; } bin = (offset_max - offset_min) / PTP_SYNC_LOOP; for (i = 0; i < k; i++) { ind = (llabs(offsets[i]) - offset_min) / bin; if (ind < PTP_SYNC_LOOP) { hist[ind]++; if (max < hist[ind]) { max = hist[ind]; *offset_ret = offsets[i]; *ts_ret = timestamps[i]; } } } return 0; } static void ntoh_ptp_results(struct ptp_clock_result_msg *msg) { int i; msg->count = ntohl(msg->count); for (i = 0; i < msg->count; i++) { msg->samples[i].id = ntohl(msg->samples[i].id); msg->samples[i].ts = ntohll(msg->samples[i].ts); } msg->series_id = ntohl(msg->series_id); } static void hton_ptp_results(struct ptp_clock_result_msg *msg) { int i; for (i = 0; i < msg->count; i++) { msg->samples[i].id = htonl(msg->samples[i].id); msg->samples[i].ts = htonll(msg->samples[i].ts); } msg->series_id = htonl(msg->series_id); msg->count = htonl(msg->count); } static inline void ptp_track_clock(struct ptp_markers_context *ctx, struct ptp_marker *marker) { if (ctx->ptp->flags & PTP_FLAG_USE_MARKER) { write(ctx->ptp->marker_fd, marker, sizeof(struct ptp_marker)); } else { struct timespec clock; unsigned long long ts; clock_gettime(CLOCK_MONOTONIC_RAW, &clock); ts = clock.tv_sec * 1000000000LL; ts += clock.tv_nsec; ptp_probe_store(ctx, marker, ts); } } static int ptp_clock_client(struct tracecmd_time_sync *tsync, long long *offset, long long *timestamp) { char sync_proto[TRACECMD_TSYNC_PNAME_LENGTH]; struct clock_sync_context *clock_context; struct ptp_clock_offset_msg res_offset; struct ptp_clock_start_msg start; struct ptp_markers_context ctx; struct ptp_clock_sync *ptp; struct ptp_marker marker; unsigned int sync_msg; unsigned int size; char *msg; int count; int ret; if (!tsync || !tsync->context || !tsync->msg_handle) return -1; clock_context = (struct clock_sync_context *)tsync->context; if (clock_context->proto_data == NULL) return -1; ptp = (struct ptp_clock_sync *)clock_context->proto_data; size = sizeof(start); msg = (char *)&start; ret = tracecmd_msg_recv_time_sync(tsync->msg_handle, sync_proto, &sync_msg, &size, &msg); if (ret || strncmp(sync_proto, PTP_NAME, TRACECMD_TSYNC_PNAME_LENGTH) || sync_msg != PTP_SYNC_PKT_START) return -1; ret = tracecmd_msg_send_time_sync(tsync->msg_handle, PTP_NAME, PTP_SYNC_PKT_START, sizeof(start), (char *)&start); marker.data.local_id = clock_context->local_id; marker.data.remote_id = clock_context->remote_id; marker.series_id = ntohl(start.series_id); marker.data.packet_id = 'r'; ptp->series_id = marker.series_id; ptp->flags = ntohl(start.flags); msg = (char *)&count; size = sizeof(count); ctx.msg.count = 0; ctx.size = PTP_SYNC_LOOP; ctx.ptp = ptp; ctx.clock = clock_context; ctx.msg.series_id = ptp->series_id; while (true) { count = 0; ret = tracecmd_msg_recv_time_sync(tsync->msg_handle, sync_proto, &sync_msg, &size, &msg); if (ret || strncmp(sync_proto, PTP_NAME, TRACECMD_TSYNC_PNAME_LENGTH) || sync_msg != PTP_SYNC_PKT_PROBE || !ntohl(count)) break; marker.data.count = ntohl(count); ptp_track_clock(&ctx, &marker); ret = tracecmd_msg_send_time_sync(tsync->msg_handle, PTP_NAME, PTP_SYNC_PKT_PROBE, sizeof(count), (char *)&count); if (ret) break; } if (strncmp(sync_proto, PTP_NAME, TRACECMD_TSYNC_PNAME_LENGTH) || sync_msg != PTP_SYNC_PKT_END) return -1; if (ptp->flags & PTP_FLAG_USE_MARKER) tracefs_iterate_raw_events(ptp->tep, clock_context->instance, NULL, 0, ptp_marker_find, &ctx); hton_ptp_results(&ctx.msg); ret = tracecmd_msg_send_time_sync(tsync->msg_handle, PTP_NAME, PTP_SYNC_PKT_PROBES, sizeof(ctx.msg), (char *)&ctx.msg); msg = (char *)&res_offset; size = sizeof(res_offset); ret = tracecmd_msg_recv_time_sync(tsync->msg_handle, sync_proto, &sync_msg, &size, (char **)&msg); if (ret || strncmp(sync_proto, PTP_NAME, TRACECMD_TSYNC_PNAME_LENGTH) || sync_msg != PTP_SYNC_PKT_OFFSET) return -1; *offset = ntohll(res_offset.offset); *timestamp = ntohll(res_offset.ts); return 0; } static int ptp_clock_server(struct tracecmd_time_sync *tsync, long long *offset, long long *timestamp) { char sync_proto[TRACECMD_TSYNC_PNAME_LENGTH]; struct ptp_clock_result_msg *results = NULL; struct clock_sync_context *clock_context; struct ptp_clock_offset_msg res_offset; struct ptp_clock_start_msg start; struct ptp_markers_context ctx; int sync_loop = PTP_SYNC_LOOP; struct ptp_clock_sync *ptp; struct ptp_marker marker; unsigned int sync_msg; unsigned int size; int bad_probes; int count = 1; int msg_count; int msg_ret; char *msg; int ret; if (!tsync || !tsync->context || !tsync->msg_handle) return -1; clock_context = (struct clock_sync_context *)tsync->context; if (clock_context->proto_data == NULL) return -1; ptp = (struct ptp_clock_sync *)clock_context->proto_data; ptp->flags = ptp_flags; memset(&start, 0, sizeof(start)); start.series_id = htonl(ptp->series_id + 1); start.flags = htonl(ptp->flags); ret = tracecmd_msg_send_time_sync(tsync->msg_handle, PTP_NAME, PTP_SYNC_PKT_START, sizeof(start), (char *)&start); if (!ret) ret = tracecmd_msg_recv_time_sync(tsync->msg_handle, sync_proto, &sync_msg, NULL, NULL); if (ret || strncmp(sync_proto, PTP_NAME, TRACECMD_TSYNC_PNAME_LENGTH) || sync_msg != PTP_SYNC_PKT_START) return -1; tracefs_instance_file_write(clock_context->instance, "trace", "\0"); ptp->series_id++; marker.data.local_id = clock_context->local_id; marker.data.remote_id = clock_context->remote_id; marker.series_id = ptp->series_id; msg = (char *)&msg_ret; size = sizeof(msg_ret); ctx.size = 2*PTP_SYNC_LOOP; ctx.ptp = ptp; ctx.clock = clock_context; ctx.msg.count = 0; ctx.msg.series_id = ptp->series_id; do { marker.data.count = count++; marker.data.packet_id = 's'; msg_count = htonl(marker.data.count); ptp_track_clock(&ctx, &marker); ret = tracecmd_msg_send_time_sync(tsync->msg_handle, PTP_NAME, PTP_SYNC_PKT_PROBE, sizeof(msg_count), (char *)&msg_count); if (!ret) ret = tracecmd_msg_recv_time_sync(tsync->msg_handle, sync_proto, &sync_msg, &size, &msg); marker.data.packet_id = 'r'; ptp_track_clock(&ctx, &marker); if (ret || strncmp(sync_proto, PTP_NAME, TRACECMD_TSYNC_PNAME_LENGTH) || sync_msg != PTP_SYNC_PKT_PROBE || ntohl(msg_ret) != marker.data.count) break; } while (--sync_loop); if (sync_loop) return -1; ret = tracecmd_msg_send_time_sync(tsync->msg_handle, PTP_NAME, PTP_SYNC_PKT_END, 0, NULL); size = 0; ret = tracecmd_msg_recv_time_sync(tsync->msg_handle, sync_proto, &sync_msg, &size, (char **)&results); if (ret || strncmp(sync_proto, PTP_NAME, TRACECMD_TSYNC_PNAME_LENGTH) || sync_msg != PTP_SYNC_PKT_PROBES || size == 0 || results == NULL) { free(results); return -1; } ntoh_ptp_results(results); if (ptp->flags & PTP_FLAG_USE_MARKER) tracefs_iterate_raw_events(ptp->tep, clock_context->instance, NULL, 0, ptp_marker_find, &ctx); if (ptp->flags & PTP_FLAG_FASTEST_RESPONSE) ptp_calc_offset_fastest(clock_context, &ctx.msg, results, offset, timestamp, &bad_probes); else ptp_calc_offset_hist(clock_context, &ctx.msg, results, offset, timestamp, &bad_probes); #ifdef TSYNC_DEBUG { char buff[256]; int res_fd; sprintf(buff, "res-id%d.txt", clock_context->remote_id); res_fd = open(buff, O_WRONLY|O_APPEND, 0644); if (res_fd > 0) { if (*offset && *timestamp) { sprintf(buff, "%d %lld %lld\n", ptp->series_id, *offset, *timestamp); write(res_fd, buff, strlen(buff)); } close(res_fd); } printf("\n calculated offset %d: %lld, %d probes, filtered out %d, PTP flags 0x%X\n\r", ptp->series_id, *offset, results->count, bad_probes, ptp->flags); if (ptp && ptp->debug_fd > 0) { sprintf(buff, "%lld %lld 0\n", *offset, *timestamp); write(ptp->debug_fd, buff, strlen(buff)); close(ptp->debug_fd); ptp->debug_fd = -1; } } #endif res_offset.offset = htonll(*offset); res_offset.ts = htonll(*timestamp); ret = tracecmd_msg_send_time_sync(tsync->msg_handle, PTP_NAME, PTP_SYNC_PKT_OFFSET, sizeof(res_offset), (char *)&res_offset); free(results); return 0; } static int ptp_clock_sync_calc(struct tracecmd_time_sync *tsync, long long *offset, long long *scaling, long long *frac, long long *timestamp, unsigned int cpu) { struct clock_sync_context *clock_context; int ret; if (!tsync || !tsync->context) return -1; clock_context = (struct clock_sync_context *)tsync->context; #ifdef TSYNC_DEBUG if (clock_context->is_server) { struct ptp_clock_sync *ptp; char buff[256]; ptp = (struct ptp_clock_sync *)clock_context->proto_data; if (ptp->debug_fd > 0) close(ptp->debug_fd); sprintf(buff, "s-id%d_%d.txt", clock_context->remote_id, ptp->series_id+1); ptp->debug_fd = open(buff, O_CREAT|O_WRONLY|O_TRUNC, 0644); } #endif if (scaling) *scaling = 1; if (frac) *frac = 0; if (clock_context->is_server) ret = ptp_clock_server(tsync, offset, timestamp); else ret = ptp_clock_client(tsync, offset, timestamp); return ret; } int ptp_clock_sync_register(void) { return tracecmd_tsync_proto_register(PTP_NAME, PTP_ACCURACY, TRACECMD_TIME_SYNC_ROLE_GUEST | TRACECMD_TIME_SYNC_ROLE_HOST | TRACECMD_TIME_SYNC_ROLE_CLIENT | TRACECMD_TIME_SYNC_ROLE_SERVER, 0, TRACECMD_TSYNC_FLAG_INTERPOLATE, ptp_clock_sync_init, ptp_clock_sync_free, ptp_clock_sync_calc); } int ptp_clock_sync_unregister(void) { return tracecmd_tsync_proto_unregister(PTP_NAME); } trace-cmd-v3.3.1/lib/trace-cmd/trace-timesync.c000066400000000000000000000635331470231550600212600ustar00rootroot00000000000000// SPDX-License-Identifier: LGPL-2.1 /* * Copyright (C) 2019, VMware, Tzvetomir Stoyanov * */ #include #include #include #include #include #include #include #include #include #include #include #include "trace-cmd-private.h" #include "trace-cmd-local.h" #include "tracefs.h" #include "event-utils.h" #include "trace-tsync-local.h" struct tsync_proto { struct tsync_proto *next; char proto_name[TRACECMD_TSYNC_PNAME_LENGTH]; enum tracecmd_time_sync_role roles; int accuracy; int supported_clocks; unsigned int flags; int (*clock_sync_init)(struct tracecmd_time_sync *clock_context); int (*clock_sync_free)(struct tracecmd_time_sync *clock_context); int (*clock_sync_calc)(struct tracecmd_time_sync *clock_context, long long *offset, long long *scaling, long long *frac, long long *timestamp, unsigned int cpu); }; struct tsync_probe_request_msg { unsigned short cpu; } __packed; static struct tsync_proto *tsync_proto_list; static struct tsync_proto *tsync_proto_find(const char *proto_name) { struct tsync_proto *proto; if (!proto_name) return NULL; for (proto = tsync_proto_list; proto; proto = proto->next) { if (strlen(proto->proto_name) == strlen(proto_name) && !strncmp(proto->proto_name, proto_name, TRACECMD_TSYNC_PNAME_LENGTH)) return proto; } return NULL; } /** * tracecmd_tsync_init - Initialize the global, per task, time sync data. */ void tracecmd_tsync_init(void) { ptp_clock_sync_register(); kvm_clock_sync_register(); } int tracecmd_tsync_proto_register(const char *proto_name, int accuracy, int roles, int supported_clocks, unsigned int flags, int (*init)(struct tracecmd_time_sync *), int (*free)(struct tracecmd_time_sync *), int (*calc)(struct tracecmd_time_sync *, long long *, long long *, long long *, long long *, unsigned int)) { struct tsync_proto *proto = NULL; if (tsync_proto_find(proto_name)) return -1; proto = calloc(1, sizeof(struct tsync_proto)); if (!proto) return -1; strncpy(proto->proto_name, proto_name, TRACECMD_TSYNC_PNAME_LENGTH); proto->accuracy = accuracy; proto->roles = roles; proto->flags = flags; proto->supported_clocks = supported_clocks; proto->clock_sync_init = init; proto->clock_sync_free = free; proto->clock_sync_calc = calc; proto->next = tsync_proto_list; tsync_proto_list = proto; return 0; } int tracecmd_tsync_proto_unregister(char *proto_name) { struct tsync_proto **last = &tsync_proto_list; if (!proto_name) return -1; for (; *last; last = &(*last)->next) { if (strlen((*last)->proto_name) == strlen(proto_name) && !strncmp((*last)->proto_name, proto_name, TRACECMD_TSYNC_PNAME_LENGTH)) { struct tsync_proto *proto = *last; *last = proto->next; free(proto); return 0; } } return -1; } bool __hidden tsync_proto_is_supported(const char *proto_name) { if (tsync_proto_find(proto_name)) return true; return false; } /** * tracecmd_tsync_get_offsets - Return the calculated time offsets * * @tsync: Pointer to time sync context * @cpu: CPU for which to get the calculated offsets * @count: Returns the number of calculated time offsets * @ts: Array of size @count containing timestamps of callculated offsets * @offsets: array of size @count, containing offsets for each timestamp * @scalings: array of size @count, containing scaling ratios for each timestamp * @frac: array of size @count, containing fraction bits for each timestamp * * Retuns -1 in case of an error, or 0 otherwise */ int tracecmd_tsync_get_offsets(struct tracecmd_time_sync *tsync, int cpu, int *count, long long **ts, long long **offsets, long long **scalings, long long **frac) { struct clock_sync_context *tsync_context; if (!tsync || !tsync->context) return -1; tsync_context = (struct clock_sync_context *)tsync->context; if (cpu >= tsync_context->cpu_count || !tsync_context->offsets) return -1; if (count) *count = tsync_context->offsets[cpu].sync_count; if (ts) *ts = tsync_context->offsets[cpu].sync_ts; if (offsets) *offsets = tsync_context->offsets[cpu].sync_offsets; if (scalings) *scalings = tsync_context->offsets[cpu].sync_scalings; if (frac) *frac = tsync_context->offsets[cpu].sync_frac; return 0; } /** * tsync_get_proto_flags - Get protocol flags * * @tsync: Pointer to time sync context * @flags: Returns the protocol flags, a combination of TRACECMD_TSYNC_FLAG_... * * Retuns -1 in case of an error, or 0 otherwise */ static int tsync_get_proto_flags(struct tracecmd_time_sync *tsync, unsigned int *flags) { struct tsync_proto *protocol; if (!tsync) return -1; protocol = tsync_proto_find(tsync->proto_name); if (!protocol) return -1; if (flags) *flags = protocol->flags; return 0; } #define PROTO_MASK_SIZE (sizeof(char)) #define PROTO_MASK_BITS (PROTO_MASK_SIZE * 8) /** * tsync_proto_select - Select time sync protocol, to be used for * timestamp synchronization with a peer * * @protos: list of tsync protocol names * @clock : trace clock * @role : local time sync role * * Retuns pointer to a protocol name, that can be used with the peer, or NULL * in case there is no match with supported protocols. * The returned string MUST NOT be freed by the caller */ static const char * tsync_proto_select(const struct tracecmd_tsync_protos *protos, const char *clock, enum tracecmd_time_sync_role role) { struct tsync_proto *selected = NULL; struct tsync_proto *proto; char **pname; int clock_id = 0; if (!protos) return NULL; clock_id = tracecmd_clock_str2id(clock); pname = protos->names; while (*pname) { for (proto = tsync_proto_list; proto; proto = proto->next) { if (!(proto->roles & role)) continue; if (proto->supported_clocks && clock_id && !(proto->supported_clocks & clock_id)) continue; if (strncmp(proto->proto_name, *pname, TRACECMD_TSYNC_PNAME_LENGTH)) continue; if (selected) { if (selected->accuracy > proto->accuracy) selected = proto; } else selected = proto; } pname++; } if (selected) return selected->proto_name; return NULL; } /** * tracecmd_tsync_get_proto - return the appropriate synchronization protocol * @protos: The list of synchronization protocols to choose from * @clock: The clock that is being used (or NULL for unknown). * * Retuns pointer to a protocol name, that can be used with the peer, or NULL * in case there is no match with supported protocols. * The returned string MUST NOT be freed by the caller */ __hidden const char * tracecmd_tsync_get_proto(const struct tracecmd_tsync_protos *protos, const char *clock, enum tracecmd_time_sync_role role) { return tsync_proto_select(protos, clock, role); } /** * tracecmd_tsync_proto_getall - Returns list of all supported * time sync protocols * @protos: return, allocated list of time sync protocol names, * supported by the peer. Must be freed by free() * @clock: selected trace clock * @role: supported protocol role * * If completed successfully 0 is returned and allocated list of strings in @protos. * The last list entry is NULL. In case of an error, -1 is returned. * @protos must be freed with free() */ int tracecmd_tsync_proto_getall(struct tracecmd_tsync_protos **protos, const char *clock, int role) { struct tracecmd_tsync_protos *plist = NULL; struct tsync_proto *proto; int clock_id = 0; int count = 1; int i; if (clock) clock_id = tracecmd_clock_str2id(clock); for (proto = tsync_proto_list; proto; proto = proto->next) { if (!(proto->roles & role)) continue; if (proto->supported_clocks && clock_id && !(proto->supported_clocks & clock_id)) continue; count++; } plist = calloc(1, sizeof(struct tracecmd_tsync_protos)); if (!plist) goto error; plist->names = calloc(count, sizeof(char *)); if (!plist->names) goto error; for (i = 0, proto = tsync_proto_list; proto && i < (count - 1); proto = proto->next) { if (!(proto->roles & role)) continue; if (proto->supported_clocks && clock_id && !(proto->supported_clocks & clock_id)) continue; plist->names[i++] = proto->proto_name; } *protos = plist; return 0; error: if (plist) { free(plist->names); free(plist); } return -1; } static int get_first_cpu(cpu_set_t **pin_mask, size_t *m_size) { int cpus = tracecmd_count_cpus(); cpu_set_t *cpu_mask; int mask_size; int i; cpu_mask = CPU_ALLOC(cpus); *pin_mask = CPU_ALLOC(cpus); if (!cpu_mask || !*pin_mask || 1) goto error; mask_size = CPU_ALLOC_SIZE(cpus); CPU_ZERO_S(mask_size, cpu_mask); CPU_ZERO_S(mask_size, *pin_mask); if (sched_getaffinity(0, mask_size, cpu_mask) == -1) goto error; for (i = 0; i < cpus; i++) { if (CPU_ISSET_S(i, mask_size, cpu_mask)) { CPU_SET_S(i, mask_size, *pin_mask); break; } } if (CPU_COUNT_S(mask_size, *pin_mask) < 1) goto error; CPU_FREE(cpu_mask); *m_size = mask_size; return 0; error: if (cpu_mask) CPU_FREE(cpu_mask); if (*pin_mask) CPU_FREE(*pin_mask); *pin_mask = NULL; *m_size = 0; return -1; } static struct tracefs_instance * clock_synch_create_instance(const char *clock, unsigned int cid) { struct tracefs_instance *instance; char inst_name[256]; snprintf(inst_name, 256, "clock_synch-%d", cid); instance = tracefs_instance_create(inst_name); if (!instance) return NULL; tracefs_instance_file_write(instance, "trace", "\0"); if (clock) tracefs_instance_file_write(instance, "trace_clock", clock); return instance; } static void clock_synch_delete_instance(struct tracefs_instance *inst) { if (!inst) return; tracefs_instance_destroy(inst); tracefs_instance_free(inst); } static int clock_context_init(struct tracecmd_time_sync *tsync, bool guest) { struct clock_sync_context *clock = NULL; struct tsync_proto *protocol; if (tsync->context) return 0; protocol = tsync_proto_find(tsync->proto_name); if (!protocol || !protocol->clock_sync_calc) return -1; clock = calloc(1, sizeof(struct clock_sync_context)); if (!clock) return -1; clock->is_guest = guest; clock->is_server = clock->is_guest; clock->instance = clock_synch_create_instance(tsync->clock_str, tsync->remote_id); if (!clock->instance) goto error; clock->cpu_count = tsync->vcpu_count; if (clock->cpu_count) { clock->offsets = calloc(clock->cpu_count, sizeof(struct clock_sync_offsets)); if (!clock->offsets) goto error; } tsync->context = clock; if (protocol->clock_sync_init && protocol->clock_sync_init(tsync) < 0) goto error; tsync->proto = protocol; return 0; error: tsync->context = NULL; if (clock->instance) clock_synch_delete_instance(clock->instance); free(clock->offsets); free(clock); return -1; } /** * tracecmd_tsync_free - Free time sync context, allocated by * tracecmd_tsync_with_host() or tracecmd_tsync_with_guest() APIs * * @tsync: Pointer to time sync context * */ void tracecmd_tsync_free(struct tracecmd_time_sync *tsync) { struct clock_sync_context *tsync_context; struct tsync_proto *proto; int i; if (!tsync) return; tsync_context = (struct clock_sync_context *)tsync->context; proto = tsync_proto_find(tsync->proto_name); if (proto && proto->clock_sync_free) proto->clock_sync_free(tsync); if (tsync_context) { clock_synch_delete_instance(tsync_context->instance); tsync_context->instance = NULL; if (tsync_context->cpu_count && tsync_context->offsets) { for (i = 0; i < tsync_context->cpu_count; i++) { free(tsync_context->offsets[i].sync_ts); free(tsync_context->offsets[i].sync_offsets); free(tsync_context->offsets[i].sync_scalings); free(tsync_context->offsets[i].sync_frac); tsync_context->offsets[i].sync_ts = NULL; tsync_context->offsets[i].sync_offsets = NULL; tsync_context->offsets[i].sync_scalings = NULL; tsync_context->offsets[i].sync_frac = NULL; tsync_context->offsets[i].sync_count = 0; tsync_context->offsets[i].sync_size = 0; } free(tsync_context->offsets); tsync_context->offsets = NULL; } } if (tsync->msg_handle) tracecmd_msg_handle_close(tsync->msg_handle); /* These are only created from the host */ if (tsync->guest_pid) { pthread_mutex_destroy(&tsync->lock); pthread_cond_destroy(&tsync->cond); pthread_barrier_destroy(&tsync->first_sync); } free(tsync->clock_str); free(tsync->proto_name); free(tsync); } static cpu_set_t *pin_to_cpu(int cpu) { static size_t size; static int cpus; cpu_set_t *mask = NULL; cpu_set_t *old = NULL; if (!cpus) { cpus = tracecmd_count_cpus(); size = CPU_ALLOC_SIZE(cpus); } if (cpu >= cpus) goto error; mask = CPU_ALLOC(cpus); if (!mask) goto error; old = CPU_ALLOC(cpus); if (!old) goto error; CPU_ZERO_S(size, mask); CPU_SET_S(cpu, size, mask); if (pthread_getaffinity_np(pthread_self(), size, old)) goto error; if (pthread_setaffinity_np(pthread_self(), size, mask)) goto error; CPU_FREE(mask); return old; error: if (mask) CPU_FREE(mask); if (old) CPU_FREE(old); return NULL; } static void restore_pin_to_cpu(cpu_set_t *mask) { static size_t size; if (!size) size = CPU_ALLOC_SIZE(tracecmd_count_cpus()); pthread_setaffinity_np(pthread_self(), size, mask); CPU_FREE(mask); } static int tsync_send(struct tracecmd_time_sync *tsync, unsigned int cpu) { struct tsync_proto *proto = tsync->proto; cpu_set_t *old_set = NULL; long long timestamp = 0; long long scaling = 0; long long offset = 0; long long frac = 0; int ret; old_set = pin_to_cpu(cpu); ret = proto->clock_sync_calc(tsync, &offset, &scaling, &frac, ×tamp, cpu); if (old_set) restore_pin_to_cpu(old_set); return ret; } static void tsync_with_host(struct tracecmd_time_sync *tsync) { char protocol[TRACECMD_TSYNC_PNAME_LENGTH]; struct tsync_probe_request_msg probe; unsigned int command; unsigned int size; char *msg; int ret; msg = (char *)&probe; size = sizeof(probe); while (true) { memset(&probe, 0, size); ret = tracecmd_msg_recv_time_sync(tsync->msg_handle, protocol, &command, &size, &msg); if (ret || strncmp(protocol, TRACECMD_TSYNC_PROTO_NONE, TRACECMD_TSYNC_PNAME_LENGTH) || command != TRACECMD_TIME_SYNC_CMD_PROBE) break; ret = tsync_send(tsync, probe.cpu); if (ret) break; } } static int record_sync_sample(struct clock_sync_offsets *offsets, int array_step, long long offset, long long scaling, long long frac, long long ts) { long long *sync_scalings = NULL; long long *sync_offsets = NULL; long long *sync_frac = NULL; long long *sync_ts = NULL; if (offsets->sync_count >= offsets->sync_size) { sync_ts = realloc(offsets->sync_ts, (offsets->sync_size + array_step) * sizeof(long long)); sync_offsets = realloc(offsets->sync_offsets, (offsets->sync_size + array_step) * sizeof(long long)); sync_scalings = realloc(offsets->sync_scalings, (offsets->sync_size + array_step) * sizeof(long long)); sync_frac = realloc(offsets->sync_frac, (offsets->sync_size + array_step) * sizeof(long long)); if (!sync_ts || !sync_offsets || !sync_scalings || !sync_frac) { free(sync_ts); free(sync_offsets); free(sync_scalings); free(sync_frac); return -1; } offsets->sync_size += array_step; offsets->sync_ts = sync_ts; offsets->sync_offsets = sync_offsets; offsets->sync_scalings = sync_scalings; offsets->sync_frac = sync_frac; } offsets->sync_ts[offsets->sync_count] = ts; offsets->sync_offsets[offsets->sync_count] = offset; offsets->sync_scalings[offsets->sync_count] = scaling; offsets->sync_frac[offsets->sync_count] = frac; offsets->sync_count++; return 0; } static int tsync_get_sample(struct tracecmd_time_sync *tsync, unsigned int cpu, int array_step) { struct tsync_proto *proto = tsync->proto; struct clock_sync_context *clock; long long timestamp = 0; long long scaling = 0; long long offset = 0; long long frac = 0; int ret; ret = proto->clock_sync_calc(tsync, &offset, &scaling, &frac, ×tamp, cpu); if (ret) { tracecmd_warning("Failed to synchronize timestamps with guest"); return -1; } if (!offset || !timestamp || !scaling) return 0; clock = tsync->context; if (!clock || cpu >= clock->cpu_count || !clock->offsets) return -1; return record_sync_sample(&clock->offsets[cpu], array_step, offset, scaling, frac, timestamp); } #define TIMER_SEC_NANO 1000000000LL static inline void get_ts_loop_delay(struct timespec *timeout, int delay_ms) { memset(timeout, 0, sizeof(struct timespec)); clock_gettime(CLOCK_REALTIME, timeout); timeout->tv_nsec += ((unsigned long long)delay_ms * 1000000LL); if (timeout->tv_nsec >= TIMER_SEC_NANO) { timeout->tv_sec += timeout->tv_nsec / TIMER_SEC_NANO; timeout->tv_nsec %= TIMER_SEC_NANO; } } #define CLOCK_TS_ARRAY 5 static int tsync_with_guest(struct tracecmd_time_sync *tsync) { struct tsync_probe_request_msg probe; int ts_array_size = CLOCK_TS_ARRAY; struct timespec timeout; bool first = true; bool end = false; int ret; int i; if (tsync->loop_interval > 0 && tsync->loop_interval < (CLOCK_TS_ARRAY * 1000)) ts_array_size = (CLOCK_TS_ARRAY * 1000) / tsync->loop_interval; while (true) { pthread_mutex_lock(&tsync->lock); for (i = 0; i < tsync->vcpu_count; i++) { probe.cpu = i; ret = tracecmd_msg_send_time_sync(tsync->msg_handle, TRACECMD_TSYNC_PROTO_NONE, TRACECMD_TIME_SYNC_CMD_PROBE, sizeof(probe), (char *)&probe); ret = tsync_get_sample(tsync, i, ts_array_size); if (ret) break; } if (first) { first = false; pthread_barrier_wait(&tsync->first_sync); } if (end || i < tsync->vcpu_count) { pthread_mutex_unlock(&tsync->lock); break; } if (tsync->loop_interval > 0) { get_ts_loop_delay(&timeout, tsync->loop_interval); ret = pthread_cond_timedwait(&tsync->cond, &tsync->lock, &timeout); pthread_mutex_unlock(&tsync->lock); if (ret && ret != ETIMEDOUT) break; else if (!ret) end = true; } else { pthread_cond_wait(&tsync->cond, &tsync->lock); end = true; pthread_mutex_unlock(&tsync->lock); } }; tracecmd_msg_send_time_sync(tsync->msg_handle, TRACECMD_TSYNC_PROTO_NONE, TRACECMD_TIME_SYNC_CMD_STOP, 0, NULL); return 0; } static void *tsync_host_thread(void *data) { struct tracecmd_time_sync *tsync = data; tsync_with_guest(tsync); pthread_exit(0); } /** * tracecmd_tsync_with_guest - Synchronize timestamps with guest * * @trace_id: Local ID for the current trace session * @fd: file descriptor of guest * @guest_pid: PID of the host OS process, running the guest * @guest_cpus: Number of the guest VCPUs * @proto_name: Name of the negotiated time synchronization protocol * @clock: Trace clock, used for that session * * On success, a pointer to time sync context is returned, or NULL in * case of an error. The context must be freed with tracecmd_tsync_free() * * This API spawns a pthread, which performs time stamps synchronization * until tracecmd_tsync_with_guest_stop() is called. */ struct tracecmd_time_sync * tracecmd_tsync_with_guest(unsigned long long trace_id, int loop_interval, unsigned int fd, int guest_pid, int guest_cpus, const char *proto_name, const char *clock) { struct tracecmd_time_sync *tsync; cpu_set_t *pin_mask = NULL; pthread_attr_t attrib; size_t mask_size = 0; int ret; if (!proto_name) return NULL; tsync = calloc(1, sizeof(*tsync)); if (!tsync) return NULL; tsync->trace_id = trace_id; tsync->loop_interval = loop_interval; tsync->proto_name = strdup(proto_name); tsync->msg_handle = tracecmd_msg_handle_alloc(fd, 0); if (!tsync->msg_handle) { ret = -1; goto error; } tsync->guest_pid = guest_pid; tsync->vcpu_count = guest_cpus; if (clock) tsync->clock_str = strdup(clock); pthread_mutex_init(&tsync->lock, NULL); pthread_cond_init(&tsync->cond, NULL); pthread_barrier_init(&tsync->first_sync, NULL, 2); pthread_attr_init(&attrib); pthread_attr_setdetachstate(&attrib, PTHREAD_CREATE_JOINABLE); clock_context_init(tsync, false); if (!tsync->context) goto error; ret = pthread_create(&tsync->thread, &attrib, tsync_host_thread, tsync); if (ret) goto error; tsync->thread_running = true; if (!get_first_cpu(&pin_mask, &mask_size)) pthread_setaffinity_np(tsync->thread, mask_size, pin_mask); pthread_barrier_wait(&tsync->first_sync); if (pin_mask) CPU_FREE(pin_mask); pthread_attr_destroy(&attrib); return tsync; error: if (tsync->msg_handle) tracecmd_msg_handle_close(tsync->msg_handle); else if (fd >= 0) close(fd); free(tsync); return NULL; } /** * tracecmd_write_guest_time_shift - Write collected timestamp corrections in a file * * @handle: Handle to a trace file, where timestamp corrections will be saved * @tsync: Time sync context with collected timestamp corrections * * Returns 0 on success, or -1 in case of an error. * * This API writes collected timestamp corrections in the metadata of the * trace file, as TRACECMD_OPTION_TIME_SHIFT option. */ int tracecmd_write_guest_time_shift(struct tracecmd_output *handle, struct tracecmd_time_sync *tsync) { struct iovec *vector = NULL; unsigned int flags; long long *scalings = NULL; long long *offsets = NULL; long long *frac = NULL; long long *ts = NULL; int vcount; int count; int i, j; int ret = -1; if (!tsync || !tsync->vcpu_count) return -1; vcount = 3 + (5 * tsync->vcpu_count); vector = calloc(vcount, sizeof(struct iovec)); if (!vector) return -1; ret = tsync_get_proto_flags(tsync, &flags); if (ret < 0) goto out; j = 0; vector[j].iov_len = 8; vector[j++].iov_base = &tsync->trace_id; vector[j].iov_len = 4; vector[j++].iov_base = &flags; vector[j].iov_len = 4; vector[j++].iov_base = &tsync->vcpu_count; for (i = 0; i < tsync->vcpu_count; i++) { if (j >= vcount) break; ret = tracecmd_tsync_get_offsets(tsync, i, &count, &ts, &offsets, &scalings, NULL); if (ret < 0 || !count || !ts || !offsets || !scalings) break; vector[j].iov_len = 4; vector[j++].iov_base = &count; vector[j].iov_len = 8 * count; vector[j++].iov_base = ts; vector[j].iov_len = 8 * count; vector[j++].iov_base = offsets; vector[j].iov_len = 8 * count; vector[j++].iov_base = scalings; } if (i < tsync->vcpu_count) { ret = -1; goto out; } /* * Writing fraction bits into the option is implemented in a separate loop for * backward compatibility. In the trace-cmd 2.9 release, this option has only offset * and scaling. That legacy code must work with the new extended option. * */ for (i = 0; i < tsync->vcpu_count; i++) { if (j >= vcount) break; ret = tracecmd_tsync_get_offsets(tsync, i, NULL, NULL, NULL, NULL, &frac); if (ret < 0) break; vector[j].iov_len = 8 * count; vector[j++].iov_base = frac; } if (i < tsync->vcpu_count) { ret = -1; goto out; } tracecmd_add_option_v(handle, TRACECMD_OPTION_TIME_SHIFT, vector, vcount); #ifdef TSYNC_DEBUG if (count > 1) printf("Got %d timestamp synch samples in %lld ns trace\n\r", count, ts[count - 1] - ts[0]); #endif ret = 0; out: free(vector); return ret; } /** * tracecmd_tsync_with_guest_stop - Stop the time sync session with a guest * * @tsync: Time sync context, representing a running time sync session * * Returns 0 on success, or -1 in case of an error. * */ int tracecmd_tsync_with_guest_stop(struct tracecmd_time_sync *tsync) { if (!tsync || !tsync->thread_running) return -1; /* Signal the time synchronization thread to complete and wait for it */ pthread_mutex_lock(&tsync->lock); pthread_cond_signal(&tsync->cond); pthread_mutex_unlock(&tsync->lock); pthread_join(tsync->thread, NULL); return 0; } static void *tsync_agent_thread(void *data) { struct tracecmd_time_sync *tsync = data; tsync_with_host(tsync); pthread_exit(NULL); } /** * tracecmd_tsync_with_host - Synchronize timestamps with host * @fd: File descriptor connecting with the host * @proto: The selected protocol * @clock: Trace clock, used for that session * @port: returned, VSOCKET port, on which the guest listens for tsync requests * @remote_id: Identifier to uniquely identify the remote host * @local_id: Identifier to uniquely identify the local machine * * On success, a pointer to time sync context is returned, or NULL in * case of an error. The context must be freed with tracecmd_tsync_free() * * This API spawns a pthread, which performs time stamps synchronization * until tracecmd_tsync_with_host_stop() is called. */ struct tracecmd_time_sync * tracecmd_tsync_with_host(int fd, const char *proto, const char *clock, int remote_id, int local_id) { struct tracecmd_time_sync *tsync; cpu_set_t *pin_mask = NULL; pthread_attr_t attrib; size_t mask_size = 0; int ret; tsync = calloc(1, sizeof(struct tracecmd_time_sync)); if (!tsync) return NULL; tsync->proto_name = strdup(proto); tsync->msg_handle = tracecmd_msg_handle_alloc(fd, 0); if (clock) tsync->clock_str = strdup(clock); tsync->remote_id = remote_id; tsync->local_id = local_id; pthread_attr_init(&attrib); tsync->vcpu_count = tracecmd_count_cpus(); pthread_attr_setdetachstate(&attrib, PTHREAD_CREATE_JOINABLE); clock_context_init(tsync, true); if (!tsync->context) goto error; ret = pthread_create(&tsync->thread, &attrib, tsync_agent_thread, tsync); if (ret) { pthread_attr_destroy(&attrib); goto error; } tsync->thread_running = true; if (!get_first_cpu(&pin_mask, &mask_size)) pthread_setaffinity_np(tsync->thread, mask_size, pin_mask); if (pin_mask) CPU_FREE(pin_mask); pthread_attr_destroy(&attrib); return tsync; error: if (tsync) { if (tsync->msg_handle) { /* Do not close the fd that was passed it */ tsync->msg_handle->fd = -1; tracecmd_msg_handle_close(tsync->msg_handle); } free(tsync->clock_str); free(tsync); } return NULL; } /** * tracecmd_tsync_with_host_stop - Stop the time sync session with a host * * @tsync: Time sync context, representing a running time sync session * * Returns 0 on success, or error number in case of an error. * */ int tracecmd_tsync_with_host_stop(struct tracecmd_time_sync *tsync) { return pthread_join(tsync->thread, NULL); } trace-cmd-v3.3.1/lib/trace-cmd/trace-util.c000066400000000000000000000341421470231550600203740ustar00rootroot00000000000000// SPDX-License-Identifier: LGPL-2.1 /* * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt * */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "trace-cmd-private.h" #include "trace-cmd-local.h" #define LOCAL_PLUGIN_DIR ".trace-cmd/plugins" #define PROC_STACK_FILE "/proc/sys/kernel/stack_tracer_enabled" static bool debug; static bool notimeout; static int log_level = TEP_LOG_WARNING; static FILE *logfp; const static struct { const char *clock_str; enum tracecmd_clocks clock_id; } trace_clocks[] = { {"local", TRACECMD_CLOCK_LOCAL}, {"global", TRACECMD_CLOCK_GLOBAL}, {"counter", TRACECMD_CLOCK_COUNTER}, {"uptime", TRACECMD_CLOCK_UPTIME}, {"perf", TRACECMD_CLOCK_PERF}, {"mono", TRACECMD_CLOCK_MONO}, {"mono_raw", TRACECMD_CLOCK_MONO_RAW}, {"boot", TRACECMD_CLOCK_BOOT}, {"x86-tsc", TRACECMD_CLOCK_X86_TSC}, {NULL, -1} }; /** * tracecmd_clock_str2id - Convert ftrace clock name to clock ID * @clock: Ftrace clock name * Returns ID of the ftrace clock */ enum tracecmd_clocks tracecmd_clock_str2id(const char *clock) { int i; if (!clock) return TRACECMD_CLOCK_UNKNOWN; for (i = 0; trace_clocks[i].clock_str; i++) { if (!strncmp(clock, trace_clocks[i].clock_str, strlen(trace_clocks[i].clock_str))) return trace_clocks[i].clock_id; } return TRACECMD_CLOCK_UNKNOWN; } /** * tracecmd_clock_id2str - Convert clock ID to ftare clock name * @clock: Clock ID * Returns name of a ftrace clock */ const char *tracecmd_clock_id2str(enum tracecmd_clocks clock) { int i; for (i = 0; trace_clocks[i].clock_str; i++) { if (trace_clocks[i].clock_id == clock) return trace_clocks[i].clock_str; } return NULL; } /** * tracecmd_set_debug - Set debug mode of the tracecmd library * @set_debug: The new "debug" mode. If true, the tracecmd library is * in "debug" mode */ void tracecmd_set_debug(bool set_debug) { debug = set_debug; if (set_debug) tracecmd_set_loglevel(TEP_LOG_DEBUG); else tracecmd_set_loglevel(TEP_LOG_CRITICAL); } /** * tracecmd_get_debug - Get debug mode of tracecmd library * Returns true, if the tracecmd library is in debug mode. * */ bool tracecmd_get_debug(void) { return debug; } /** * tracecmd_set_notimeout - Do not timeout waiting for responses * @set_notimeout: True or false to set notimeout mode. * * If @set_notimeout is true, then the library will not fail waiting for * responses. This is useful when running the code under gdb. * Note, if debug is set, then this makes no difference as it will always * not timeout. */ void tracecmd_set_notimeout(bool set_notimeout) { notimeout = set_notimeout; } /** * tracecmd_get_notimeout - Get setting of notimeout of tracecmd library * Returns true, if the tracecmd library has notimeout set. * */ bool tracecmd_get_notimeout(void) { return notimeout || debug; } void tracecmd_parse_proc_kallsyms(struct tep_handle *pevent, char *file, unsigned int size __maybe_unused) { unsigned long long addr; int sav_errno; char *func; char *line; char *next = NULL; char *mod; char ch; line = strtok_r(file, "\n", &next); while (line) { int func_start, func_end = 0; int mod_start, mod_end = 0; int n; mod = NULL; sav_errno = errno; errno = 0; n = sscanf(line, "%16llx %c %n%*s%n%*1[\t][%n%*s%n", &addr, &ch, &func_start, &func_end, &mod_start, &mod_end); if (errno) return; errno = sav_errno; if (n != 2 || !func_end) return; func = line + func_start; /* * Hacks for * - arm arch that adds a lot of bogus '$a' functions * - x86-64 that reports per-cpu variable offsets as absolute */ if (func[0] != '$' && ch != 'A' && ch != 'a') { line[func_end] = 0; if (mod_end) { mod = line + mod_start; /* truncate the extra ']' */ line[mod_end - 1] = 0; } tep_register_function(pevent, func, addr, mod); } line = strtok_r(NULL, "\n", &next); } } void tracecmd_parse_ftrace_printk(struct tep_handle *pevent, char *file, unsigned int size __maybe_unused) { unsigned long long addr; char *printk; char *line; char *next = NULL; char *addr_str; char *fmt; line = strtok_r(file, "\n", &next); while (line) { addr_str = strtok_r(line, ":", &fmt); if (!addr_str) { tracecmd_warning("printk format with empty entry"); break; } addr = strtoull(addr_str, NULL, 16); /* fmt still has a space, skip it */ printk = strdup(fmt+1); line = strtok_r(NULL, "\n", &next); tep_register_print_string(pevent, printk, addr); free(printk); } } /** * tracecmd_add_id - add an int to the event id list * @list: list to add the id to * @id: id to add * @len: current length of list of ids. * * The typical usage is: * * events = tracecmd_add_id(events, id, len++); * * Returns the new allocated list with the id included. * the list will contain a '-1' at the end. * * The returned list should be freed with free(). */ int *tracecmd_add_id(int *list, int id, int len) { if (!list) list = malloc(sizeof(*list) * 2); else list = realloc(list, sizeof(*list) * (len + 2)); if (!list) return NULL; list[len++] = id; list[len] = -1; return list; } struct add_plugin_data { int ret; int index; char **files; }; static void add_plugin_file(struct tep_handle *pevent, const char *path, const char *name, void *data) { struct add_plugin_data *pdata = data; char **ptr; int size; int i; if (pdata->ret) return; size = pdata->index + 2; ptr = realloc(pdata->files, sizeof(char *) * size); if (!ptr) goto out_free; pdata->files = ptr; ptr[pdata->index] = strdup(name); if (!ptr[pdata->index]) goto out_free; pdata->index++; pdata->files[pdata->index] = NULL; return; out_free: for (i = 0; i < pdata->index; i++) free(pdata->files[i]); free(pdata->files); pdata->files = NULL; pdata->ret = errno; } /** * trace_util_find_plugin_files - find list of possible plugin files * @suffix: The suffix of the plugin files to find * * Searches the plugin directory for files that end in @suffix, and * will return an allocated array of file names, or NULL if none is * found. * * Must check against TRACECMD_ISERR(ret) as if an error happens * the errno will be returned with the TRACECMD_ERR_MSK to denote * such an error occurred. * * Use trace_util_free_plugin_files() to free the result. */ __hidden char **trace_util_find_plugin_files(const char *suffix) { struct add_plugin_data pdata; memset(&pdata, 0, sizeof(pdata)); tep_load_plugins_hook(NULL, suffix, add_plugin_file, &pdata); if (pdata.ret) return TRACECMD_ERROR(pdata.ret); return pdata.files; } /** * trace_util_free_plugin_files - free the result of trace_util_find_plugin_files() * @files: The result from trace_util_find_plugin_files() * * Frees the contents that were allocated by trace_util_find_plugin_files(). */ void __hidden trace_util_free_plugin_files(char **files) { int i; if (!files || TRACECMD_ISERR(files)) return; for (i = 0; files[i]; i++) { free(files[i]); } free(files); } static char *get_source_plugins_dir(void) { char *p, path[PATH_MAX+1]; int ret; ret = readlink("/proc/self/exe", path, PATH_MAX); if (ret > PATH_MAX || ret < 0) return NULL; path[ret] = 0; dirname(path); p = strrchr(path, '/'); if (!p) return NULL; /* Check if we are in the the source tree */ if (strcmp(p, "/tracecmd") != 0) return NULL; strcpy(p, "/lib/traceevent/plugins"); return strdup(path); } __hidden struct tep_plugin_list * trace_load_plugins(struct tep_handle *tep, int flags) { struct tep_plugin_list *list; char *path; if (flags & TRACECMD_FL_LOAD_NO_PLUGINS) tep_set_flag(tep, TEP_DISABLE_PLUGINS); if (flags & TRACECMD_FL_LOAD_NO_SYSTEM_PLUGINS) tep_set_flag(tep, TEP_DISABLE_SYS_PLUGINS); path = get_source_plugins_dir(); if (path) tep_add_plugin_path(tep, path, TEP_PLUGIN_LAST); free(path); list = tep_load_plugins(tep); return list; } /** * tracecmd_set_loglevel - set log level of the library * @level: desired level of the library messages */ void tracecmd_set_loglevel(enum tep_loglevel level) { log_level = level; } void __weak tracecmd_warning(const char *fmt, ...) { va_list ap; if (log_level < TEP_LOG_WARNING) return; va_start(ap, fmt); tep_vprint("libtracecmd", TEP_LOG_WARNING, true, fmt, ap); va_end(ap); } void __weak tracecmd_info(const char *fmt, ...) { va_list ap; if (log_level < TEP_LOG_INFO) return; va_start(ap, fmt); tep_vprint("libtracecmd", TEP_LOG_INFO, false, fmt, ap); va_end(ap); } void __weak tracecmd_critical(const char *fmt, ...) { int ret; va_list ap; if (log_level < TEP_LOG_CRITICAL) return; va_start(ap, fmt); ret = tep_vprint("libtracecmd", TEP_LOG_CRITICAL, true, fmt, ap); va_end(ap); if (debug) { if (!ret) ret = -1; exit(ret); } } void __weak tracecmd_debug(const char *fmt, ...) { va_list ap; if (!tracecmd_get_debug()) return; va_start(ap, fmt); vprintf(fmt, ap); va_end(ap); } #define LOG_BUF_SIZE 1024 static void __plog(const char *prefix, const char *fmt, va_list ap, FILE *fp) { char buf[LOG_BUF_SIZE]; int r; r = vsnprintf(buf, LOG_BUF_SIZE, fmt, ap); if (r > LOG_BUF_SIZE) r = LOG_BUF_SIZE; if (logfp) { fprintf(logfp, "[%d]%s%.*s", getpid(), prefix, r, buf); fflush(logfp); return; } fprintf(fp, "%.*s", r, buf); } void tracecmd_plog(const char *fmt, ...) { va_list ap; va_start(ap, fmt); __plog("", fmt, ap, stdout); va_end(ap); /* Make sure it gets to the screen, in case we crash afterward */ fflush(stdout); } void tracecmd_plog_error(const char *fmt, ...) { va_list ap; char *str = ""; va_start(ap, fmt); __plog("Error: ", fmt, ap, stderr); va_end(ap); if (errno) str = strerror(errno); if (logfp) fprintf(logfp, "\n%s\n", str); else fprintf(stderr, "\n%s\n", str); } /** * tracecmd_set_logfile - Set file for logging * @logfile: Name of the log file * * Returns 0 on successful completion or -1 in case of error */ int tracecmd_set_logfile(char *logfile) { if (logfp) fclose(logfp); logfp = fopen(logfile, "w"); if (!logfp) return -1; return 0; } /** * tracecmd_stack_tracer_status - Check stack trace status * @status: Returned stack trace status: * 0 - not configured, disabled * non 0 - enabled * * Returns -1 in case of an error, 0 if file does not exist * (stack tracer not configured in kernel) or 1 on successful completion. */ int tracecmd_stack_tracer_status(int *status) { struct stat stat_buf; char buf[64]; long num; int fd; int n; if (stat(PROC_STACK_FILE, &stat_buf) < 0) { /* stack tracer not configured on running kernel */ *status = 0; /* not configured means disabled */ return 0; } fd = open(PROC_STACK_FILE, O_RDONLY); if (fd < 0) return -1; n = read(fd, buf, sizeof(buf)); close(fd); if (n <= 0) return -1; if (n >= sizeof(buf)) return -1; buf[n] = 0; errno = 0; num = strtol(buf, NULL, 10); /* Check for various possible errors */ if (num > INT_MAX || num < INT_MIN || (!num && errno)) return -1; *status = num; return 1; /* full success */ } /** * tracecmd_count_cpus - Get the number of CPUs in the system * * Returns the number of CPUs in the system, or 0 in case of an error */ int tracecmd_count_cpus(void) { static int once; char buf[1024]; int cpus = 0; char *pbuf; size_t *pn; FILE *fp; size_t n; int r; cpus = sysconf(_SC_NPROCESSORS_CONF); if (cpus > 0) return cpus; if (!once) { once++; tracecmd_warning("sysconf could not determine number of CPUS"); } /* Do the hack to figure out # of CPUS */ n = 1024; pn = &n; pbuf = buf; fp = fopen("/proc/cpuinfo", "r"); if (!fp) { tracecmd_critical("Can not read cpuinfo"); return 0; } while ((r = getline(&pbuf, pn, fp)) >= 0) { char *p; if (strncmp(buf, "processor", 9) != 0) continue; for (p = buf+9; isspace(*p); p++) ; if (*p == ':') cpus++; } fclose(fp); return cpus; } #define FNV_64_PRIME 0x100000001b3ULL /* * tracecmd_generate_traceid - Generate a unique ID, used to identify * the current tracing session * * Returns unique ID */ unsigned long long tracecmd_generate_traceid(void) { unsigned long long hash = 0; unsigned char *ustr; struct sysinfo sinfo; struct timespec ts; char *str = NULL; clock_gettime(CLOCK_MONOTONIC_RAW, &ts); sysinfo(&sinfo); asprintf(&str, "%ld %ld %ld %ld %ld %ld %ld %ld %d", ts.tv_sec, ts.tv_nsec, sinfo.loads[0], sinfo.loads[1], sinfo.loads[2], sinfo.freeram, sinfo.sharedram, sinfo.freeswap, sinfo.procs); if (!str) return 0; ustr = (unsigned char *)str; hash = 0; while (*ustr) { hash ^= (unsigned long long)*ustr++; hash *= FNV_64_PRIME; } free(str); return hash; } /* * tracecmd_default_file_version - Get default trace file version of the library * * Returns the default trace file version */ int tracecmd_default_file_version(void) { return FILE_VERSION_DEFAULT; } bool tracecmd_is_version_supported(unsigned int version) { if (version <= FILE_VERSION_MAX) return true; return false; } static void __attribute__ ((constructor)) tracecmd_lib_init(void) { tracecmd_compress_init(); } static void __attribute__((destructor)) tracecmd_lib_free(void) { tracecmd_compress_free(); } __hidden bool check_file_state(unsigned long file_version, int current_state, int new_state) { if (file_version >= FILE_VERSION_SECTIONS) { if (current_state < TRACECMD_FILE_INIT) return false; return true; } switch (new_state) { case TRACECMD_FILE_HEADERS: case TRACECMD_FILE_FTRACE_EVENTS: case TRACECMD_FILE_ALL_EVENTS: case TRACECMD_FILE_KALLSYMS: case TRACECMD_FILE_PRINTK: case TRACECMD_FILE_CMD_LINES: case TRACECMD_FILE_CPU_COUNT: if (current_state == (new_state - 1)) return true; break; case TRACECMD_FILE_OPTIONS: if (file_version < FILE_VERSION_SECTIONS && current_state == TRACECMD_FILE_CPU_COUNT) return true; break; case TRACECMD_FILE_CPU_LATENCY: case TRACECMD_FILE_CPU_FLYRECORD: if (current_state == TRACECMD_FILE_OPTIONS) return true; break; } return false; } trace-cmd-v3.3.1/libtracecmd.pc.template000066400000000000000000000005151470231550600201560ustar00rootroot00000000000000prefix=INSTALL_PREFIX libdir=LIB_DIR includedir=HEADER_DIR Name: libtracecmd URL: https://git.kernel.org/pub/scm/utils/trace-cmd/trace-cmd.git/ Description: Library for creating and reading trace-cmd data files Version: LIB_VERSION Requires: libtracefs >= LIBTRACEFS_MIN_VERSION Cflags: -I${includedir} Libs: -L${libdir} -ltracecmd trace-cmd-v3.3.1/make-trace-cmd.sh000077500000000000000000000022711470231550600166610ustar00rootroot00000000000000#!/bin/bash if [ -z "$INSTALL_PATH" ]; then echo echo 'Error: No $INSTALL_PATH defined' echo echo " usage: [PREFIX=prefix][BUILD_PATH=/path/to/build][CFLAGS=custom-cflags] INSTALL_PATH=/path/to/install make-trace-cmd.sh install|install_libs|clean|uninstall" echo echo " Used to create a self contained directory to copy to other machines." echo echo " Please read PACKAGING for more information." echo exit fi if [ ! -d $INSTALL_PATH ]; then mkdir $INSTALL_PATH fi if [ ! -z "$BUILD_PATH" ]; then if [ ! -d $BUILD_PATH ]; then mkdir $BUILD_PATH fi O_PATH="O=$BUILD_PATH" fi if [ -z "$PREFIX" ]; then PREFIX="/usr" fi PKG_PATH=`pkg-config --variable pc_path pkg-config | tr ":" " " | cut -d' ' -f1` WITH_PATH="" # If pkg-config supports --with-path, use that as well if pkg-config --with-path=/tmp --variable pc_path pkg-config &> /dev/null ; then WITH_PATH="--with-path=$INSTALL_PATH$PKG_PATH" fi if [ -z "$CFLAGS" ]; then CFLAGS="-g -Wall" fi PKG_CONFIG_PATH="$INSTALL_PATH/$PKG_PATH" PKG_CONFIG="pkg-config $WITH_PATH --define-variable=prefix=$INSTALL_PATH/$PREFIX" CFLAGS="-I$INSTALL_PATH/$PREFIX/include $CFLAGS" make DESTDIR=$INSTALL_PATH $O_PATH prefix=$PREFIX $@ trace-cmd-v3.3.1/meson-vcs-tag.sh000077500000000000000000000011451470231550600165710ustar00rootroot00000000000000#!/usr/bin/env bash # SPDX-License-Identifier: LGPL-2.1-or-later set -eu set -o pipefail dir="${1:?}" fallback="${2:?}" # Apparently git describe has a bug where it always considers the work-tree # dirty when invoked with --git-dir (even though 'git status' is happy). Work # around this issue by cd-ing to the source directory. cd "$dir" # Check that we have either .git/ (a normal clone) or a .git file (a work-tree) # and that we don't get confused if a tarball is extracted in a higher-level # git repository. [ -e .git ] && git describe --abbrev=7 --dirty=+ 2>/dev/null | sed 's/^v//' || echo "$fallback" trace-cmd-v3.3.1/meson.build000066400000000000000000000111071470231550600157100ustar00rootroot00000000000000# SPDX-License-Identifier: GPL-2.0 # # Copyright (c) 2023 Daniel Wagner, SUSE LLC project( 'trace-cmd', ['c'], meson_version: '>= 0.50.0', license: 'GPL-2.0', version: '3.3.1', default_options: [ 'c_std=gnu99', 'buildtype=debug', 'default_library=both', 'prefix=/usr/local', 'warning_level=1']) cc = meson.get_compiler('c') prefixdir = get_option('prefix') datadir = join_paths(prefixdir, get_option('datadir')) bindir = join_paths(prefixdir, get_option('bindir')) mandir = join_paths(prefixdir, get_option('mandir')) htmldir = join_paths(prefixdir, get_option('htmldir')) conf = configuration_data() libtraceevent_dep = dependency('libtraceevent', version: '>= 1.5.0', required: true) libtracefs_dep = dependency('libtracefs', version: '>= 1.8.0', required: true) threads_dep = dependency('threads', required: true) dl_dep = cc.find_library('dl', required : false) zlib_dep = dependency('zlib', required: false) conf.set('HAVE_ZLIB', zlib_dep.found(), description: 'Is zlib avialable?') libzstd_dep = dependency('libzstd', version: '>= 1.4.0', required: false) conf.set('HAVE_ZSTD', libzstd_dep.found(), description: 'Is libzstd available?') cunit_dep = dependency('cunit', required : false) vsock_defined = get_option('vsock') and cc.has_header('linux/vm_sockets.h') conf.set('VSOCK', vsock_defined, description: 'Is vsock available?') perf_defined = cc.has_header('linux/perf_event.h') conf.set('PERF', perf_defined, description: 'Is perf available?') have_ptrace = get_option('ptrace') and cc.compiles( ''' #include #include int main (void) { int ret; ret = ptrace(PTRACE_ATTACH, 0, NULL, 0); ptrace(PTRACE_TRACEME, 0, NULL, 0); ptrace(PTRACE_GETSIGINFO, 0, NULL, NULL); ptrace(PTRACE_GETEVENTMSG, 0, NULL, NULL); ptrace(PTRACE_SETOPTIONS, NULL, NULL, PTRACE_O_TRACEFORK | PTRACE_O_TRACEVFORK | PTRACE_O_TRACECLONE | PTRACE_O_TRACEEXIT); ptrace(PTRACE_CONT, NULL, NULL, 0); ptrace(PTRACE_DETACH, 0, NULL, NULL); ptrace(PTRACE_SETOPTIONS, 0, NULL, PTRACE_O_TRACEFORK | PTRACE_O_TRACEVFORK | PTRACE_O_TRACECLONE | PTRACE_O_TRACEEXIT); return ret; } ''', name: 'ptrace') if not have_ptrace conf.set10('NO_PTRACE', true, description: 'Is ptrace missing?') conf.set('WARN_NO_PTRACE', true, description: 'Issue no ptrace warning?') endif audit_dep = dependency('audit', required: false) if not audit_dep.found() conf.set10('NO_AUDIT', true, description: 'Is audit missing?') conf.set('WARN_NO_AUDIT', true, description: 'Issue no audit warning?') endif config_h = configure_file( output: 'config.h', configuration: conf ) version = meson.project_version().split('.') vconf = configuration_data() vconf.set('VERSION_CODE', version[0].to_int() * 256 + version[1].to_int()) vconf.set('EXTRAVERSION', '"@0@"'.format(version[2])) vconf.set('FILE_VERSION', '""') vconf.set('VERSION_STRING', '"@0@"'.format(meson.project_version())) version_tag = get_option('version-tag') if version_tag != '' vconf.set('VERSION_GIT', '"@0@"'.format(version_tag)) else r = run_command( 'meson-vcs-tag.sh', meson.current_source_dir(), meson.project_version(), check: true) vconf.set('VERSION_GIT', '"@0@"'.format(r.stdout().strip())) endif version_h = configure_file( output: 'tc_version.h', configuration: vconf) add_project_arguments( [ '-D_GNU_SOURCE', '-include', 'config.h', ], language : 'c') incdir = include_directories(['.', 'include']) # libtracecmd: trace-cmd currently depends on a statically linked # libtracecmd. libtracecmd is sill very strongly coupled with # trace-cmd (or the other way around). To reduce the development setup # complexity we add some of the 'top meson.build' from libtracecmd and # make it simpler to use. library_version = '1.5.1' libtracecmd_standalone_build = false libtracecmd_ext_incdir = include_directories( [ 'include', 'include/trace-cmd', 'tracecmd/include' ]) subdir('lib/trace-cmd/include') subdir('lib/trace-cmd/include/private') subdir('lib/trace-cmd') # trace-cmd subdir('tracecmd') subdir('python') if cunit_dep.found() subdir('utest') endif subdir('Documentation/trace-cmd') custom_target( 'docs', output: 'docs', depends: [html, man], command: ['echo']) trace-cmd-v3.3.1/meson_options.txt000066400000000000000000000022051470231550600172020ustar00rootroot00000000000000# -*- mode: meson -*- # SPDX-License-Identifier: GPL-2.0 option('version-tag', type : 'string', description : 'override the git version string') option('vsock', type : 'boolean', value : true, description : 'build with vsock support') option('ptrace', type : 'boolean', value : true, description : 'build with ptrace support') option('htmldir', type : 'string', value : 'share/doc/trace-cmd-doc', description : 'directory for HTML documentation') option('asciidoctor', type : 'boolean', value: false, description : 'use asciidoctor instead of asciidoc') option('docbook-xls-172', type : 'boolean', value : false, description : 'enable docbook XLS 172 workaround') option('asciidoc-no-roff', type : 'boolean', value : false, description : 'enable no roff workaround') option('man-bold-literal', type : 'boolean', value : false, description : 'enable bold literals') option('docbook-suppress-sp', type : 'boolean', value : false, description : 'docbook suppress sp') option('python', type : 'combo', choices : ['auto', 'true', 'false'], description : 'Generate trac-cmd Python bindings') trace-cmd-v3.3.1/python/000077500000000000000000000000001470231550600150675ustar00rootroot00000000000000trace-cmd-v3.3.1/python/Makefile000066400000000000000000000020171470231550600165270ustar00rootroot00000000000000# SPDX-License-Identifier: GPL-2.0 include $(src)/scripts/utils.mk ifdef BUILD_PYTHON_WORKS PYTHON_SO_INSTALL := ctracecmd.install PYTHON_PY_PROGS := event-viewer.install PYTHON_PY_LIBS := tracecmd.install endif ctracecmd.so: ctracecmd.i $(LIBTRACECMD_STATIC) swig -Wall -python -noproxy \ -I$(src)/include/trace-cmd -I$(src)/lib/trace-cmd/include/private \ $(LIBTRACEEVENT_CFLAGS) ctracecmd.i $(CC) -fpic -c $(CPPFLAGS) $(CFLAGS) $(PYTHON_INCLUDES) ctracecmd_wrap.c $(CC) --shared $(LIBTRACECMD_STATIC) $(LDFLAGS) $(LIBZSTD_LDLAGS) $(ZLIB_LDLAGS) \ ctracecmd_wrap.o -o ctracecmd.so $(TRACE_LIBS) $(PYTHON_SO_INSTALL): %.install : %.so force $(Q)$(call do_install_data,$<,$(python_dir_SQ)) $(PYTHON_PY_PROGS): %.install : %.py force $(Q)$(call do_install,$<,$(python_dir_SQ)) $(PYTHON_PY_LIBS): %.install : %.py force $(Q)$(call do_install_data,$<,$(python_dir_SQ)) install_python: $(PYTHON_SO_INSTALL) $(PYTHON_PY_PROGS) $(PYTHON_PY_LIBS) clean: $(RM) *.a *.so *.o .*.d ctracecmd_wrap.* force: .PHONY: clean force trace-cmd-v3.3.1/python/ctracecmd.i000066400000000000000000000127511470231550600171740ustar00rootroot00000000000000// tracecmd.i %module ctracecmd %include "typemaps.i" %include "constraints.i" %nodefaultctor record; %nodefaultdtor record; %apply Pointer NONNULL { struct tracecmd_input *handle }; %apply Pointer NONNULL { struct tep_handle *pevent }; %apply Pointer NONNULL { struct tep_format_field * }; %apply unsigned long long *OUTPUT {unsigned long long *} %apply int *OUTPUT {int *} %{ #include "trace-cmd.h" #include "trace-cmd-private-python.h" #include "event-parse.h" #include "event-utils.h" #include %} %typemap(in) PyObject *pyfunc { if (!PyCallable_Check($input)) { PyErr_SetString(PyExc_TypeError, "Need a callable object!"); return NULL; } $1 = $input; } %ignore python_callback; %inline %{ static int python_callback(struct trace_seq *s, struct tep_record *record, struct tep_event *event, void *context); static int skip_output = 0; static void py_supress_trace_output(void) { skip_output = 1; } void warning(const char *fmt, ...) { va_list ap; if (skip_output) return; va_start(ap, fmt); tep_vprint("tracecmd", TEP_LOG_WARNING, true, fmt, ap); va_end(ap); } PyObject *convert_pevent(unsigned long pevent) { void *pev = (void *)pevent; return SWIG_NewPointerObj(SWIG_as_voidptr(pev), SWIGTYPE_p_tep_handle, 0); } void py_pevent_register_event_handler(struct tep_handle *pevent, int id, char *subsys, char *evname, PyObject *pyfunc) { Py_INCREF(pyfunc); tep_register_event_handler(pevent, id, subsys, evname, python_callback, pyfunc); } static PyObject *py_field_get_stack(struct tep_handle *pevent, struct tep_record *record, struct tep_event *event, int long_size) { PyObject *list; struct tep_format_field *field; void *data = record->data; const char *func = NULL; unsigned long addr; field = tep_find_any_field(event, "caller"); if (!field) { PyErr_SetString(PyExc_TypeError, "Event doesn't have caller field"); return NULL; } list = PyList_New(0); for (data += field->offset; data < record->data + record->size; data += long_size) { addr = tep_read_number(event->tep, data, long_size); if ((long_size == 8 && addr == (unsigned long long)-1) || ((int)addr == -1)) break; func = tep_find_function(event->tep, addr); if (PyList_Append(list, PyUnicode_FromString(func))) { Py_DECREF(list); return NULL; } } return list; } #if PY_MAJOR_VERSION >= 3 static PyObject *fromMemory(void *buf, size_t len) { return PyMemoryView_FromMemory(buf, len, PyBUF_READ); } #define PY_INT_AS_LONG PyLong_AsLong #else static PyObject *fromMemory(void *buf, size_t len) { return PyBuffer_FromMemory(buf, len); } #define PY_INT_AS_LONG PyInt_AS_LONG #endif static PyObject *py_field_get_data(struct tep_format_field *f, struct tep_record *r) { if (!strncmp(f->type, "__data_loc ", 11)) { unsigned long long val; int len, offset; if (tep_read_number_field(f, r->data, &val)) { PyErr_SetString(PyExc_TypeError, "Field is not a valid number"); return NULL; } /* * The actual length of the dynamic array is stored * in the top half of the field, and the offset * is in the bottom half of the 32 bit field. */ offset = val & 0xffff; len = val >> 16; return fromMemory(r->data + offset, len); } return fromMemory(r->data + f->offset, f->size); } static PyObject *py_field_get_str(struct tep_format_field *f, struct tep_record *r) { if (!strncmp(f->type, "__data_loc ", 11)) { unsigned long long val; int offset; if (tep_read_number_field(f, r->data, &val)) { PyErr_SetString(PyExc_TypeError, "Field is not a valid number"); return NULL; } /* * The actual length of the dynamic array is stored * in the top half of the field, and the offset * is in the bottom half of the 32 bit field. */ offset = val & 0xffff; return PyUnicode_FromString((char *)r->data + offset); } return PyUnicode_FromStringAndSize((char *)r->data + f->offset, strnlen((char *)r->data + f->offset, f->size)); } static PyObject *py_format_get_keys(struct tep_event *ef, bool common_keys) { PyObject *list; struct tep_format_field *f; list = PyList_New(0); for (f = common_keys ? ef->format.common_fields : ef->format.fields; f; f = f->next) { if (PyList_Append(list, PyUnicode_FromString(f->name))) { Py_DECREF(list); return NULL; } } return list; } %} %wrapper %{ static int python_callback(struct trace_seq *s, struct tep_record *record, struct tep_event *event, void *context) { PyObject *arglist, *result; int r = 0; record->ref_count++; arglist = Py_BuildValue("(OOO)", SWIG_NewPointerObj(SWIG_as_voidptr(s), SWIGTYPE_p_trace_seq, 0), SWIG_NewPointerObj(SWIG_as_voidptr(record), SWIGTYPE_p_tep_record, 0), SWIG_NewPointerObj(SWIG_as_voidptr(event), SWIGTYPE_p_tep_event, 0)); result = PyObject_Call(context, arglist, NULL); Py_XDECREF(arglist); if (result && result != Py_None) { if (!PyInt_Check(result)) { PyErr_SetString(PyExc_TypeError, "callback must return int"); PyErr_Print(); Py_XDECREF(result); return 0; } r = PY_INT_AS_LONG(result); } else if (result == Py_None) r = 0; else PyErr_Print(); Py_XDECREF(result); return r; } %} %ignore trace_seq_vprintf; %ignore vpr_stat; %ignore tep_plugin_kvm_get_func; %ignore tep_plugin_kvm_put_func; /* SWIG can't grok these, define them to nothing */ #define __trace #define __attribute__(x) #define __thread %include "trace-cmd.h" %include "trace-cmd-private-python.h" %include %include trace-cmd-v3.3.1/python/event-viewer.py000077500000000000000000000173541470231550600200760ustar00rootroot00000000000000#!/usr/bin/env python2 import getopt from gobject import * import gtk from tracecmd import * import time app = None data_func_cnt = 0 # In a "real" app these width should be determined at runtime testing max length # strings in the current font. TS_COL_W = 150 CPU_COL_W = 35 EVENT_COL_W = 150 PID_COL_W = 75 COMM_COL_W = 250 def timing(func): def wrapper(*arg): start = time.time() ret = func(*arg) end = time.time() print('@%s took %0.3f s' % (func.func_name, (end-start))) return ret return wrapper class EventStore(gtk.GenericTreeModel): class EventRef(object): '''Inner class to build the trace event index''' def __init__(self, index, timestamp, offset, cpu): self.index = index self.offset = offset self.ts = timestamp self.cpu = cpu def __cmp__(self, other): if self.ts < other.ts: return -1 if self.ts > other.ts: return 1 if self.offset < other.offset: return -1 if self.offset > other.offset: return 1 return 0 # The store only returns the record offset into the trace # The view is responsible for looking up the Event with the offset column_types = (long,) @timing def __init__(self, trace): gtk.GenericTreeModel.__init__(self) self.trace = trace self.refs = [] self._load_trace() self._sort() self._reindex() @timing def _load_trace(self): print("Building trace index...") index = 0 for cpu in range(0, trace.cpus): rec = tracecmd_read_data(self.trace._handle, cpu) while rec: offset = tep_record_offset_get(rec) ts = tep_record_ts_get(rec) self.refs.append(self.EventRef(index, ts, offset, cpu)) index = index + 1 rec = tracecmd_read_data(self.trace._handle, cpu) print("Loaded %d events from trace" % (index)) @timing def _sort(self): self.refs.sort() @timing def _reindex(self): for i in range(0, len(self.refs)): self.refs[i].index = i def on_get_flags(self): return gtk.TREE_MODEL_LIST_ONLY | gtk.TREE_MODEL_ITERS_PERSIST def on_get_n_columns(self): return len(self.column_types) def on_get_column_type(self, col): return self.column_types[col] def on_get_iter(self, path): return self.refs[path[0]] def on_get_path(self, ref): return ref.index def on_get_value(self, ref, col): ''' The Event record was getting deleted when passed back via this method, now it just returns the ref itself. Use get_event() instead. ''' if col == 0: #return self.trace.read_event_at(ref.offset) return ref return None def on_iter_next(self, ref): try: return self.refs[ref.index+1] except IndexError: return None def on_iter_children(self, ref): if ref: return None return self.refs[0] def on_iter_has_child(self, ref): return False def on_iter_n_children(self, ref): if ref: return 0 return len(self.refs) def on_iter_nth_child(self, ref, n): if ref: return None try: return self.refs[n] except IndexError: return None def on_iter_parent(self, child): return None def get_event(self, iter): '''This allocates a record which must be freed by the caller''' try: ref = self.refs[self.get_path(iter)[0]] ev = self.trace.read_event_at(ref.offset) return ev except IndexError: return None class EventView(gtk.TreeView): def __init__(self, model): gtk.TreeView.__init__(self, model) self.set_fixed_height_mode(True) ts_col = gtk.TreeViewColumn("Time (s)") ts_col.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED) ts_col.set_fixed_width(TS_COL_W) ts_cell = gtk.CellRendererText() ts_col.pack_start(ts_cell, False) ts_col.set_cell_data_func(ts_cell, self.data_func, "ts") self.append_column(ts_col) cpu_col = gtk.TreeViewColumn("CPU") cpu_col.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED) cpu_col.set_fixed_width(CPU_COL_W) cpu_cell = gtk.CellRendererText() cpu_col.pack_start(cpu_cell, False) cpu_col.set_cell_data_func(cpu_cell, self.data_func, "cpu") self.append_column(cpu_col) event_col = gtk.TreeViewColumn("Event") event_col.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED) event_col.set_fixed_width(EVENT_COL_W) event_cell = gtk.CellRendererText() event_col.pack_start(event_cell, False) event_col.set_cell_data_func(event_cell, self.data_func, "event") self.append_column(event_col) pid_col = gtk.TreeViewColumn("PID") pid_col.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED) pid_col.set_fixed_width(PID_COL_W) pid_cell = gtk.CellRendererText() pid_col.pack_start(pid_cell, False) pid_col.set_cell_data_func(pid_cell, self.data_func, "pid") self.append_column(pid_col) comm_col = gtk.TreeViewColumn("Comm") comm_col.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED) comm_col.set_fixed_width(COMM_COL_W) comm_cell = gtk.CellRendererText() comm_col.pack_start(comm_cell, False) comm_col.set_cell_data_func(comm_cell, self.data_func, "comm") self.append_column(comm_col) def data_func(self, col, cell, model, iter, data): global app, data_func_cnt ev = model.get_event(iter) #ev = model.get_value(iter, 0) if not ev: return False if data == "ts": cell.set_property("markup", "%d.%09d" % (ev.ts/1000000000, ev.ts%1000000000)) data_func_cnt = data_func_cnt + 1 if app: app.inc_data_func() elif data == "cpu": cell.set_property("markup", ev.cpu) elif data == "event": cell.set_property("markup", ev.name) elif data == "pid": cell.set_property("markup", ev.pid) elif data == "comm": cell.set_property("markup", ev.comm) else: print("Unknown Column:", data) return False return True class EventViewerApp(gtk.Window): def __init__(self, trace): gtk.Window.__init__(self) self.set_size_request(650, 400) self.set_position(gtk.WIN_POS_CENTER) self.connect("destroy", gtk.main_quit) self.set_title("Event Viewer") store = EventStore(trace) view = EventView(store) sw = gtk.ScrolledWindow() sw.set_policy(gtk.POLICY_NEVER, gtk.POLICY_ALWAYS) sw.add(view) # track how often the treeview data_func is called self.data_func_label = gtk.Label("0") hbox = gtk.HBox() hbox.pack_start(gtk.Label("TS Data Func Calls:"), False, False) hbox.pack_start(self.data_func_label, False, False) vbox = gtk.VBox() vbox.pack_start(hbox, False) vbox.pack_end(sw) self.add(vbox) self.show_all() def inc_data_func(self): global data_func_cnt self.data_func_label.set_text(str(data_func_cnt)) if __name__ == "__main__": if len(sys.argv) >=2: filename = sys.argv[1] else: filename = "trace.dat" print("Initializing trace...") trace = Trace(filename) print("Initializing app...") app = EventViewerApp(trace) print("Go!") gtk.main() trace-cmd-v3.3.1/python/meson.build000066400000000000000000000030151470231550600172300ustar00rootroot00000000000000# SPDX-License-Identifier: GPL-2.0 # # Copyright (c) 2023 Daniel Wagner, SUSE LLC want_python = get_option('python') if want_python != 'false' python3 = import('python').find_installation('python3') py3_dep = python3.dependency(required: want_python == 'true') swig = find_program('swig', required: want_python == 'true') header_found = cc.has_header('Python.h', dependencies: py3_dep) have_python_support = py3_dep.found() and swig.found() and header_found else have_python_support = false endif if have_python_support pymod_swig = custom_target( 'ctracecmd.py', input: ['ctracecmd.i'], output: ['ctracecmd.py', 'ctracecmd_wrap.c'], command: [ swig, '-python', '-I' + meson.current_source_dir() + '/../include/trace-cmd', '-I' + meson.current_source_dir() + '/../lib/trace-cmd/include/private', '-I' + libtraceevent_dep.get_pkgconfig_variable('prefix') + '/include/traceevent', '-o', '@OUTPUT1@', '@INPUT0@'], install: true, install_dir: [ python3.get_install_dir(pure: false, subdir: 'trace-cmd'), false]) incdir_py = include_directories(['.', '../include/trace-cmd', '../lib/trace-cmd/include/private']) pyctracecmd_clib = python3.extension_module( '_ctracecmd', pymod_swig[1], dependencies : [libtraceevent_dep, libtracefs_dep, py3_dep], include_directories: [incdir, incdir_py], install: true, subdir: 'trace-cmd') endif trace-cmd-v3.3.1/python/tracecmd.py000066400000000000000000000172041470231550600172270ustar00rootroot00000000000000# # Copyright (C) International Business Machines Corp., 2009 # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # # 2009-Dec-17: Initial version by Darren Hart # from functools import cached_property from collections.abc import Mapping from itertools import chain from ctracecmd import * """ Python interface to the tracecmd library for parsing ftrace traces Python tracecmd applications should be written to this interface. It will be updated as the tracecmd C API changes and try to minimze the impact to python applications. The ctracecmd Python module is automatically generated using SWIG and it is recommended applications not use it directly. TODO: consider a complete class hierarchy of ftrace events... """ class Event(Mapping): """ This class can be used to access event data according to an event's record and format. """ def __init__(self, pevent, record, format): self._pevent = pevent self._record = record self._format = format def __str__(self): return "%d.%09d CPU%d %s: pid=%d comm=%s type=%d" % \ (self.ts/1000000000, self.ts%1000000000, self.cpu, self.name, self.num_field("common_pid"), self.comm, self.type) def __del__(self): tracecmd_free_record(self._record) def __getitem__(self, n): if n.startswith('common_'): f = tep_find_common_field(self._format, n) else: f = tep_find_field(self._format, n) if f is None: raise KeyError("no field '%s'" % n) return Field(self._record, f) def __iter__(self): yield from chain(self.common_keys, self.keys) def __len__(self): return len(self.common_keys) + len(self.keys) @cached_property def common_keys(self): return py_format_get_keys(self._format, True) @cached_property def keys(self): return py_format_get_keys(self._format, False) @cached_property def comm(self): return tep_data_comm_from_pid(self._pevent, self.pid) @cached_property def cpu(self): return tep_record_cpu_get(self._record) @cached_property def name(self): return tep_event_name_get(self._format) @cached_property def pid(self): return tep_data_pid(self._pevent, self._record) @cached_property def ts(self): return tep_record_ts_get(self._record) @cached_property def type(self): return tep_data_type(self._pevent, self._record) def num_field(self, name): f = tep_find_any_field(self._format, name) if f is None: return None ret, val = tep_read_number_field(f, tep_record_data_get(self._record)) if ret: return None return val def str_field(self, name): f = tep_find_any_field(self._format, name) if f is None: return None return py_field_get_str(f, self._record) def stack_field(self, long_size): return py_field_get_stack(self._pevent, self._record, self._format, long_size) class TraceSeq(object): def __init__(self, trace_seq): self._trace_seq = trace_seq def puts(self, s): return trace_seq_puts(self._trace_seq, s) class FieldError(Exception): pass class Field(object): def __init__(self, record, field): self._record = record self._field = field @cached_property def data(self): return py_field_get_data(self._field, self._record) def __long__(self): ret, val = tep_read_number_field(self._field, tep_record_data_get(self._record)) if ret: raise FieldError("Not a number field") return val __int__ = __long__ def __str__(self): return py_field_get_str(self._field, self._record) class PEvent(object): def __init__(self, pevent): self._pevent = pevent def _handler(self, cb, s, record, event_fmt): return cb(TraceSeq(s), Event(self._pevent, record, event_fmt)) def register_event_handler(self, subsys, event_name, callback): l = lambda s, r, e: self._handler(callback, s, r, e) py_pevent_register_event_handler( self._pevent, -1, subsys, event_name, l) @cached_property def file_endian(self): if tep_is_file_bigendian(self._pevent): return '>' return '<' class FileFormatError(Exception): pass class Trace(object): """ Trace object represents the trace file it is created with. The Trace object aggregates the tracecmd structures and functions that are used to manage the trace and extract events from it. """ def __init__(self, filename): self._handle = tracecmd_open(filename, 0) self._pevent = tracecmd_get_tep(self._handle) @cached_property def cpus(self): return tracecmd_cpus(self._handle) @cached_property def long_size(self): return tracecmd_long_size(self._handle) def read_event(self, cpu): rec = tracecmd_read_data(self._handle, cpu) if rec: type = tep_data_type(self._pevent, rec) format = tep_find_event(self._pevent, type) # rec ownership goes over to Event instance return Event(self._pevent, rec, format) return None def read_event_at(self, offset): res = tracecmd_read_at(self._handle, offset) # SWIG only returns the CPU if the record is None for some reason if isinstance(res, int): return None rec, cpu = res type = tep_data_type(self._pevent, rec) format = tep_find_event(self._pevent, type) # rec ownership goes over to Event instance return Event(self._pevent, rec, format) def read_next_event(self): res = tracecmd_read_next_data(self._handle) if isinstance(res, int): return None rec, cpu = res type = tep_data_type(self._pevent, rec) format = tep_find_event(self._pevent, type) return Event(self._pevent, rec, format) def peek_event(self, cpu): rec = tracecmd_peek_data_ref(self._handle, cpu) if rec is None: return None type = tep_data_type(self._pevent, rec) format = tep_find_event(self._pevent, type) # rec ownership goes over to Event instance return Event(self._pevent, rec, format) # Basic builtin test, execute module directly if __name__ == "__main__": t = Trace("trace.dat") print(f"Trace contains data for {t.cpus} cpus, long has {t.long_size} bytes") print("Peek the first event on CPU0") print("\t%s" % (t.peek_event(0))) print("Events by CPUs") for cpu in range(0, t.cpus): print("CPU %d" % (cpu)) ev = t.read_event(cpu) while ev: print("\t%s" % (ev)) ev = t.read_event(cpu) t = Trace("trace.dat") print("Events by time") ev = t.read_next_event() while ev: print("\t%s" % (ev)) ev = t.read_next_event() trace-cmd-v3.3.1/python/tracecmdgui.py000066400000000000000000000173241470231550600177370ustar00rootroot00000000000000# # Copyright (C) International Business Machines Corp., 2009 # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # # 2009-Dec-31: Initial version by Darren Hart # import gobject #delete me ? import time import sys import gtk from tracecmd import * from ctracecmdgui import * """ Python interface for tracecmd GTK widgets Python tracecmd applications should be written to this interface. It will be updated as the tracecmd gui C API changes and try to minimze the impact to python applications. The ctracecmdgui Python module is automatically generated using SWIG and it is recommended applications not use it directly. """ # In a "real" app these width should be determined at runtime testing max length # strings in the current font. TS_COL_W = 150 CPU_COL_W = 35 EVENT_COL_W = 150 PID_COL_W = 75 COMM_COL_W = 250 def timing(func): def wrapper(*arg): start = time.time() ret = func(*arg) end = time.time() print('@%s took %0.3f s' % (func.func_name, (end-start))) return ret return wrapper class EventStore(gtk.GenericTreeModel): # FIXME: get these from the C code: trace_view_store->column_types ... @timing def __init__(self, trace): gtk.GenericTreeModel.__init__(self) self.trace = trace self.cstore = trace_view_store_new(trace.handle) self.gtk_cstore = trace_view_store_as_gtk_tree_model(self.cstore) num_rows = trace_view_store_num_rows_get(self.cstore) print("Loaded %d events from trace" % (num_rows)) def on_get_flags(self): return trace_view_store_get_flags(self.gtk_cstore) def on_get_n_columns(self): return trace_view_store_get_n_columns(self.gtk_cstore) def on_get_column_type(self, col): # I couldn't figure out how to convert the C GType into the python # GType. The current typemap converts the C GType into the python type, # which is what this function is supposed to return anyway. pytype = trace_view_store_get_column_type(self.gtk_cstore, col) return pytype def on_get_iter(self, path): if len(path) > 1 and path[1] != 1: return None n = path[0] rec = trace_view_store_get_row(self.cstore, n) return rec def on_get_path(self, rec): if not rec: return None start_row = trace_view_store_start_row_get(self.cstore) return (trace_view_record_pos_get(rec) - start_row,) def on_get_value(self, rec, col): # FIXME: write SWIG wrapper to marshal the Gvalue and wrap the rec in an # Iter pass #return trace_view_store_get_value_py(self.cstore, rec, col) def on_iter_next(self, rec): pos = trace_view_record_pos_get(rec) start_row = trace_view_store_start_row_get(self.cstore) return trace_view_store_get_row(self.cstore, pos - start_row + 1) def on_iter_children(self, rec): if rec: return None return trace_view_store_get_row(self.cstore, 0) def on_iter_has_child(self, rec): return False def on_iter_n_children(self, rec): if rec: return 0 return trace_view_store_num_rows_get(self.cstore) def on_iter_nth_child(self, rec, n): if rec: return None return trace_view_store_get_row(self.cstore, n) def on_iter_parent(self, child): return None def get_event(self, iter): path = self.get_path(iter) if not path: return None rec = trace_view_store_get_row(self.cstore, path[0]) if not rec: return None ev = self.trace.read_event_at(trace_view_record_offset_get(rec)) return ev class EventView(gtk.TreeView): def __init__(self, model): gtk.TreeView.__init__(self, model) self.set_fixed_height_mode(True) ts_col = gtk.TreeViewColumn("Time (s)") ts_col.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED) ts_col.set_fixed_width(TS_COL_W) ts_cell = gtk.CellRendererText() ts_col.pack_start(ts_cell, False) ts_col.set_cell_data_func(ts_cell, self.data_func, "ts") self.append_column(ts_col) cpu_col = gtk.TreeViewColumn("CPU") cpu_col.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED) cpu_col.set_fixed_width(CPU_COL_W) cpu_cell = gtk.CellRendererText() cpu_col.pack_start(cpu_cell, False) cpu_col.set_cell_data_func(cpu_cell, self.data_func, "cpu") self.append_column(cpu_col) event_col = gtk.TreeViewColumn("Event") event_col.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED) event_col.set_fixed_width(EVENT_COL_W) event_cell = gtk.CellRendererText() event_col.pack_start(event_cell, False) event_col.set_cell_data_func(event_cell, self.data_func, "event") self.append_column(event_col) pid_col = gtk.TreeViewColumn("PID") pid_col.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED) pid_col.set_fixed_width(PID_COL_W) pid_cell = gtk.CellRendererText() pid_col.pack_start(pid_cell, False) pid_col.set_cell_data_func(pid_cell, self.data_func, "pid") self.append_column(pid_col) comm_col = gtk.TreeViewColumn("Comm") comm_col.set_sizing(gtk.TREE_VIEW_COLUMN_FIXED) comm_col.set_fixed_width(COMM_COL_W) comm_cell = gtk.CellRendererText() comm_col.pack_start(comm_cell, False) comm_col.set_cell_data_func(comm_cell, self.data_func, "comm") self.append_column(comm_col) def data_func(self, col, cell, model, iter, data): ev = model.get_event(iter) #ev = model.get_value(iter, 0) if not ev: return False if data == "ts": cell.set_property("markup", "%d.%d" % (ev.ts/1000000000, ev.ts%1000000000)) elif data == "cpu": cell.set_property("markup", ev.cpu) elif data == "event": cell.set_property("markup", ev.name) elif data == "pid": cell.set_property("markup", ev.pid) elif data == "comm": cell.set_property("markup", ev.comm) else: print("Unknown Column:", data) return False return True class EventViewerApp(gtk.Window): def __init__(self, trace): gtk.Window.__init__(self) self.set_size_request(650, 400) self.set_position(gtk.WIN_POS_CENTER) self.connect("destroy", gtk.main_quit) self.set_title("Event Viewer") store = EventStore(trace) view = EventView(store) sw = gtk.ScrolledWindow() sw.set_policy(gtk.POLICY_NEVER, gtk.POLICY_ALWAYS) sw.add(view) # track how often the treeview data_func is called self.add(sw) self.show_all() # Basic builtin test, execute module directly if __name__ == "__main__": if len(sys.argv) >=2: filename = sys.argv[1] else: filename = "trace.dat" print("Initializing trace...") trace = Trace(filename) print("Initializing app...") app = EventViewerApp(trace) print("Go!") gtk.main() trace-cmd-v3.3.1/scripts/000077500000000000000000000000001470231550600152355ustar00rootroot00000000000000trace-cmd-v3.3.1/scripts/debug/000077500000000000000000000000001470231550600163235ustar00rootroot00000000000000trace-cmd-v3.3.1/scripts/debug/tsync_hist.py000066400000000000000000000025361470231550600210720ustar00rootroot00000000000000# SPDX-License-Identifier: GPL-2.0 # # Copyright (C) 2019, VMware Inc, Tzvetomir Stoyanov # Copyright (C) 2019, VMware Inc, Yordan Karadzhov import matplotlib.pyplot as plt import matplotlib.lines as mlines import numpy as np import sys def newline(p1, p2): ax = plt.gca() xmin, xmax = ax.get_xbound() if(p2[0] == p1[0]): xmin = xmax = p1[0] ymin, ymax = ax.get_ybound() else: ymax = p1[1]+(p2[1]-p1[1])/(p2[0]-p1[0])*(xmax-p1[0]) ymin = p1[1]+(p2[1]-p1[1])/(p2[0]-p1[0])*(xmin-p1[0]) l = mlines.Line2D([xmin,xmax], [ymin,ymax], color='red') ax.add_line(l) return l data = np.loadtxt(fname = sys.argv[1]) selected_ts = data[-1, 1] selected_ofs = data[-1, 0] data = data[:-1,:] x = data[:, 1] - data[:, 0] mean = x.mean() std = x.std() num_bins = 500 min = x.min() #+ .4 * (x.max() - x.min()) max = x.max() #- .4 * (x.max() - x.min()) bins = np.linspace(min, max, num_bins, endpoint = False, dtype=int) fig, ax = plt.subplots() # the histogram of the data n, bins, patches = ax.hist(x, bins, histtype=u'step'); ax.set_xlabel('clock offset [$\mu$s]') ax.set_ylabel('entries') ax.set_title("$\sigma$=%i" % std) x1, y1 = [selected_ofs, min], [selected_ofs, max] newline(x1, y1) # Tweak spacing to prevent clipping of ylabel fig.tight_layout() plt.show() trace-cmd-v3.3.1/scripts/debug/tsync_readme000066400000000000000000000011241470231550600207210ustar00rootroot00000000000000PTP-like algorithm debug ======================== tsync_*.py scripts can be used to visualise debug files, written when the PTP-like algorithm is compiled with TSYNC_DEBUG defined. The files are located in the guest machine: s-cid*.txt - For each offset calculation: host and guest clocks and calculated offset. res-cid*.txt - For each tracing session: all calculated clock offsets. tsync_hist.py plots a histogram, using data from a s-cid*.txt file: "python tsync_hist.py s-cid2_1.txt" tsync_res.py plots a line, using data from res-cid*.txt file: "python tsync_res.py res-cid2.txt" trace-cmd-v3.3.1/scripts/debug/tsync_res.py000066400000000000000000000021141470231550600207040ustar00rootroot00000000000000# SPDX-License-Identifier: GPL-2.0 # # Copyright (C) 2019, VMware Inc, Tzvetomir Stoyanov # Copyright (C) 2019, VMware Inc, Yordan Karadzhov import matplotlib.pyplot as plt import matplotlib.lines as mlines import numpy as np import sys def newline(p1, p2): ax = plt.gca() xmin, xmax = ax.get_xbound() if(p2[0] == p1[0]): xmin = xmax = p1[0] ymin, ymax = ax.get_ybound() else: ymax = p1[1]+(p2[1]-p1[1])/(p2[0]-p1[0])*(xmax-p1[0]) ymin = p1[1]+(p2[1]-p1[1])/(p2[0]-p1[0])*(xmin-p1[0]) l = mlines.Line2D([xmin,xmax], [ymin,ymax], color='red') ax.add_line(l) return l data = np.loadtxt(fname = sys.argv[1]) x = data[:, 0] y = data[:, 1] fig, ax = plt.subplots() ax.set_xlabel('samples (t)') ax.set_ylabel('clock offset') ax.set_title("$\delta$=%i ns" % (max(y) - min(y))) l = mlines.Line2D(x, y) ax.add_line(l) ax.set_xlim(min(x), max(x)) ax.set_ylim(min(y), max(y) ) print(min(y), max(y), max(y) - min(y)) # Tweak spacing to prevent clipping of ylabel fig.tight_layout() plt.show() trace-cmd-v3.3.1/scripts/utils.mk000066400000000000000000000135171470231550600167350ustar00rootroot00000000000000# SPDX-License-Identifier: GPL-2.0 # Utils ifeq ($(BUILDGUI), 1) GUI = 'GUI ' GSPACE = else GUI = GSPACE = " " endif GOBJ = $(GSPACE)$(notdir $(strip $@)) ifeq ($(VERBOSE),1) Q = S = else Q = @ S = -s endif # Use empty print_* macros if either SILENT or VERBOSE. ifeq ($(findstring 1,$(SILENT)$(VERBOSE)),1) print_compile = print_app_build = print_fpic_compile = print_shared_lib_compile = print_plugin_obj_compile = print_plugin_build = print_install = print_uninstall = print_update = print_asciidoc = print_xsltproc = print_install = hide_xsltproc_output = else print_compile = echo ' $(GUI)COMPILE '$(GOBJ); print_app_build = echo ' $(GUI)BUILD '$(GOBJ); print_fpic_compile = echo ' $(GUI)COMPILE FPIC '$(GOBJ); print_shared_lib_compile = echo ' $(GUI)COMPILE SHARED LIB '$(GOBJ); print_plugin_obj_compile = echo ' $(GUI)COMPILE PLUGIN OBJ '$(GOBJ); print_plugin_build = echo ' $(GUI)BUILD PLUGIN '$(GOBJ); print_static_lib_build = echo ' $(GUI)BUILD STATIC LIB '$(GOBJ); print_install = echo ' $(GUI)INSTALL '$(GSPACE)$1' to $(DESTDIR_SQ)$2'; print_update = echo ' $(GUI)UPDATE '$(GOBJ); print_uninstall = echo ' $(GUI)UNINSTALLING $(DESTDIR_SQ)$1'; print_asciidoc = echo ' ASCIIDOC '`basename $@`; print_xsltproc = echo ' XSLTPROC '`basename $@`; print_install = echo ' INSTALL '`basename $1`' to $(DESTDIR_SQ)'$2; hide_xsltproc_output = 2> /dev/null endif do_fpic_compile = \ ($(print_fpic_compile) \ $(CC) -c $(CPPFLAGS) $(CFLAGS) $(EXT) -fPIC $< -o $@) do_compile = \ ($(if $(GENERATE_PIC), $(do_fpic_compile), \ $(print_compile) \ $(CC) -c $(CPPFLAGS) $(CFLAGS) $(EXT) $< -o $@)) do_app_build = \ ($(print_app_build) \ $(CC) $^ -rdynamic -Wl,-rpath=$(libdir) -o $@ $(LDFLAGS) $(CONFIG_LIBS) $(LIBS)) do_build_static_lib = \ ($(print_static_lib_build) \ $(RM) $@; $(AR) rcs $@ $^) do_compile_shared_library = \ ($(print_shared_lib_compile) \ $(CC) --shared $^ '-Wl,-soname,$(1),-rpath=$$ORIGIN' -o $@ $(LDFLAGS) $(LIBS)) do_compile_plugin_obj = \ ($(print_plugin_obj_compile) \ $(CC) -c $(CPPFLAGS) $(CFLAGS) -fPIC -o $@ $<) do_plugin_build = \ ($(print_plugin_build) \ $(CC) $(CFLAGS) $(LDFLAGS) -shared -nostartfiles -o $@ $<) do_compile_python_plugin_obj = \ ($(print_plugin_obj_compile) \ $(CC) -c $(CPPFLAGS) $(CFLAGS) $(PYTHON_DIR_SQ) $(PYTHON_INCLUDES) -fPIC -o $@ $<) do_python_plugin_build = \ ($(print_plugin_build) \ $(CC) $< -shared $(LDFLAGS) $(PYTHON_LDFLAGS) -o $@) define make_version.h (echo '/* This file is automatically generated. Do not modify. */'; \ echo \#define VERSION_CODE $(shell \ expr $(VERSION) \* 256 + $(PATCHLEVEL)); \ echo '#define EXTRAVERSION ' $(EXTRAVERSION); \ echo '#define VERSION_STRING "'$(VERSION).$(PATCHLEVEL).$(EXTRAVERSION)'"'; \ echo '#define FILE_VERSION '$(FILE_VERSION); \ if [ -d $(src)/.git ]; then \ d=`git diff`; \ x=""; \ if [ ! -z "$$d" ]; then x="+"; fi; \ echo '#define VERSION_GIT "'$(shell \ git log -1 --pretty=format:"%H" 2>/dev/null)$$x'"'; \ else \ echo '#define VERSION_GIT "not-a-git-repo"'; \ fi \ ) > $1 endef define update_version.h ($(call make_version.h, $@.tmp); \ if [ -r $@ ] && cmp -s $@ $@.tmp; then \ rm -f $@.tmp; \ else \ $(print_update) \ mv -f $@.tmp $@; \ fi); endef define update_dir (echo $1 > $@.tmp; \ if [ -r $@ ] && cmp -s $@ $@.tmp; then \ rm -f $@.tmp; \ else \ $(print_update) \ mv -f $@.tmp $@; \ fi); endef define build_prefix (echo $1 > $@.tmp; \ if [ -r $@ ] && cmp -s $@ $@.tmp; then \ rm -f $@.tmp; \ else \ $(print_update) \ mv -f $@.tmp $@; \ fi); endef define do_install $(print_install) \ if [ ! -d '$(DESTDIR_SQ)$2' ]; then \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \ fi; \ $(INSTALL) $(if $3,-m $3,) $1 '$(DESTDIR_SQ)$2' endef define do_install_data $(print_install) \ if [ ! -d '$(DESTDIR_SQ)$2' ]; then \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \ fi; \ $(INSTALL) -m 644 $1 '$(DESTDIR_SQ)$2' endef define do_install_pkgconfig_file if [ -n "${pkgconfig_dir}" ]; then \ $(call do_install,$(PKG_CONFIG_FILE),$(pkgconfig_dir),644); \ else \ (echo Failed to locate pkg-config directory) 1>&2; \ fi endef define do_make_pkgconfig_file $(print_app_build) $(Q)cp -f $(srctree)/${PKG_CONFIG_SOURCE_FILE}.template ${PKG_CONFIG_FILE}; \ sed -i "s|INSTALL_PREFIX|${1}|g" ${PKG_CONFIG_FILE}; \ sed -i "s|LIB_VERSION|${LIBTRACECMD_VERSION}|g" ${PKG_CONFIG_FILE}; \ sed -i "s|LIB_DIR|$(libdir)|g" ${PKG_CONFIG_FILE}; \ sed -i "s|LIBTRACEFS_MIN_VERSION|$(LIBTRACEFS_MIN_VERSION)|g" ${PKG_CONFIG_FILE}; \ sed -i "s|HEADER_DIR|$(includedir)/trace-cmd|g" ${PKG_CONFIG_FILE}; endef do_asciidoc_build = \ ($(print_asciidoc) \ asciidoc -d manpage -b docbook -o $@ $<) do_xsltproc_build = \ ($(print_xsltproc) \ xsltproc --nonet -o $@ ${MANPAGE_DOCBOOK_XSL} $< $(hide_xsltproc_output)) # # asciidoc requires a synopsis, but file format man pages (5) do # not require them. This removes it from the file in the final step. define remove_synopsis (sed -e '/^\.SH "SYNOPSIS"/,/ignore/d' $1 > $1.tmp;\ mv $1.tmp $1) endef define do_install_docs $(print_install) \ if [ ! -d '$(DESTDIR_SQ)$2' ]; then \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \ fi; \ $(INSTALL) -m 644 $1 '$(DESTDIR_SQ)$2' endef ifneq ($(findstring $(MAKEFLAGS),s),s) ifneq ($(V),1) QUIET_ASCIIDOC = @echo ' ASCIIDOC '$@; QUIET_XMLTO = @echo ' XMLTO '$@; QUIET_SUBDIR0 = +@subdir= QUIET_SUBDIR1 = ;$(NO_SUBDIR) \ echo ' SUBDIR ' $$subdir; \ $(MAKE) $(PRINT_DIR) -C $$subdir export V endif endif trace-cmd-v3.3.1/tracecmd/000077500000000000000000000000001470231550600153305ustar00rootroot00000000000000trace-cmd-v3.3.1/tracecmd/Makefile000066400000000000000000000043271470231550600167760ustar00rootroot00000000000000# SPDX-License-Identifier: GPL-2.0 VERSION := $(TC_VERSION) PATCHLEVEL := $(TC_PATCHLEVEL) EXTRAVERSION := $(TC_EXTRAVERSION) bdir:=$(obj)/tracecmd TC_VERSION := $(bdir)/include/tc_version.h TARGETS = $(bdir)/trace-cmd $(TC_VERSION) BUILDGUI := 0 include $(src)/scripts/utils.mk CFLAGS += -I$(bdir)/include TRACE_CMD_OBJS = TRACE_CMD_OBJS += trace-cmd.o TRACE_CMD_OBJS += trace-record.o TRACE_CMD_OBJS += trace-read.o TRACE_CMD_OBJS += trace-split.o TRACE_CMD_OBJS += trace-listen.o TRACE_CMD_OBJS += trace-stack.o TRACE_CMD_OBJS += trace-hist.o TRACE_CMD_OBJS += trace-mem.o TRACE_CMD_OBJS += trace-snapshot.o TRACE_CMD_OBJS += trace-stat.o TRACE_CMD_OBJS += trace-profile.o TRACE_CMD_OBJS += trace-stream.o TRACE_CMD_OBJS += trace-record.o TRACE_CMD_OBJS += trace-restore.o TRACE_CMD_OBJS += trace-check-events.o TRACE_CMD_OBJS += trace-show.o TRACE_CMD_OBJS += trace-list.o TRACE_CMD_OBJS += trace-usage.o TRACE_CMD_OBJS += trace-dump.o TRACE_CMD_OBJS += trace-clear.o TRACE_CMD_OBJS += trace-vm.o TRACE_CMD_OBJS += trace-convert.o TRACE_CMD_OBJS += trace-attach.o TRACE_CMD_OBJS += trace-agent.o TRACE_CMD_OBJS += trace-tsync.o TRACE_CMD_OBJS += trace-setup-guest.o TRACE_CMD_OBJS += trace-sqlhist.o ifeq ($(VSOCK_DEFINED), 1) TRACE_CMD_OBJS += trace-vsock.o endif ALL_OBJS := $(TRACE_CMD_OBJS:%.o=$(bdir)/%.o) all_objs := $(sort $(ALL_OBJS)) all_deps := $(all_objs:$(bdir)/%.o=$(bdir)/.%.d) LPTHREAD ?= -lpthread LRT ?= -lrt CONFIG_INCLUDES = CONFIG_LIBS = $(LRT) $(LPTHREAD) $(TRACE_LIBS) $(ZLIB_LDLAGS) $(LIBZSTD_LDLAGS) CONFIG_FLAGS = all: $(TARGETS) $(bdir): @mkdir -p $(bdir) $(bdir)/include: | $(bdir) @mkdir -p $(bdir)/include $(TC_VERSION): force | $(bdir)/include $(Q)$(call update_version.h) $(all_deps): | $(bdir) $(all_objs): | $(bdir) $(bdir)/trace-cmd: $(ALL_OBJS) $(Q)$(do_app_build) $(bdir)/trace-cmd: $(LIBTRACECMD_STATIC) $(bdir)/%.o: %.c $(Q)$(call do_compile) $(all_deps): $(bdir)/.%.d: %.c $(Q)$(CC) -M -MT $(bdir)/$*.o $(CPPFLAGS) $(CFLAGS) $< > $@ $(all_deps): $(TC_VERSION) $(all_objs): $(bdir)/%.o : $(bdir)/.%.d dep_includes := $(wildcard $(DEPS)) ifneq ($(dep_includes),) include $(dep_includes) endif clean: $(RM) $(bdir)/*.a $(bdir)/*.so $(bdir)/*.o $(bdir)/.*.d $(TARGETS) force: .PHONY: clean trace-cmd-v3.3.1/tracecmd/include/000077500000000000000000000000001470231550600167535ustar00rootroot00000000000000trace-cmd-v3.3.1/tracecmd/include/bug.h000066400000000000000000000005211470231550600176770ustar00rootroot00000000000000/* SPDX-License-Identifier: LGPL-2.1 */ #ifndef __TRACE_CMD_BUG #define __TRACE_CMD_BUG #define unlikely(cond) __builtin_expect(!!(cond), 0) #define WARN_ONCE(cond, fmt, ...) \ ({ \ int __c__ = cond; \ if (unlikely(__c__)) { \ warning(fmt, ##__VA_ARGS__); \ } \ __c__; \ }) #endif /* __TRACE_CMD_BUG */ trace-cmd-v3.3.1/tracecmd/include/list.h000066400000000000000000000031051470231550600200760ustar00rootroot00000000000000/* SPDX-License-Identifier: LGPL-2.1 */ /* * Copyright (C) 2009 Red Hat Inc, Steven Rostedt * */ #ifndef __LIST_H #define __LIST_H #define offset_of(type, field) __builtin_offsetof(type, field) #define container_of(p, type, field) (type *)((long)p - offset_of(type, field)) struct list_head { struct list_head *next; struct list_head *prev; }; static inline void list_head_init(struct list_head *list) { list->next = list; list->prev = list; } static inline void list_add(struct list_head *p, struct list_head *head) { struct list_head *next = head->next; p->prev = head; p->next = next; next->prev = p; head->next = p; } static inline void list_add_tail(struct list_head *p, struct list_head *head) { struct list_head *prev = head->prev; p->prev = prev; p->next = head; prev->next = p; head->prev = p; } static inline void list_del(struct list_head *p) { struct list_head *next = p->next; struct list_head *prev = p->prev; next->prev = prev; prev->next = next; } static inline int list_empty(struct list_head *list) { return list->next == list; } #define list_for_each_entry(p, list, field) \ for (p = container_of((list)->next, typeof(*p), field); \ &(p)->field != list; \ p = container_of((p)->field.next, typeof(*p), field)) #define list_for_each_entry_safe(p, n, list, field) \ for (p = container_of((list)->next, typeof(*p), field), \ n = container_of((p)->field.next, typeof(*p), field); \ &(p)->field != list; \ p = n, n = container_of((p)->field.next, typeof(*p), field)) #endif /* __LIST_H */ trace-cmd-v3.3.1/tracecmd/include/trace-local.h000066400000000000000000000273371470231550600213260ustar00rootroot00000000000000/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt * */ #ifndef __TRACE_LOCAL_H #define __TRACE_LOCAL_H #include #include /* for DIR */ #include /* for isdigit() */ #include #include #include #include #include "trace-cmd-private.h" #include "event-utils.h" #define TRACE_AGENT_DEFAULT_PORT 823 #define DEFAULT_INPUT_FILE "trace.dat" #define GUEST_PIPE_NAME "trace-pipe-cpu" #define GUEST_DIR_FMT "/var/lib/trace-cmd/virt/%s" #define GUEST_FIFO_FMT GUEST_DIR_FMT "/" GUEST_PIPE_NAME "%d" #define VIRTIO_FIFO_FMT "/dev/virtio-ports/" GUEST_PIPE_NAME "%d" /* fix stupid glib guint64 typecasts and printf formats */ typedef unsigned long long u64; struct buffer_instance; #define __printf(a, b) __attribute__((format(printf,a,b))) __printf(1,2) void warning(const char *fmt, ...); /* for local shared information with trace-cmd executable */ void usage(char **argv); extern int silence_warnings; extern int show_status; void trace_set_loglevel(int level); int trace_set_verbose(char *level); enum port_type { USE_UDP = 0, /* Default setting */ USE_TCP, USE_VSOCK }; struct pid_record_data { int pid; int brass[2]; int cpu; int closed; struct tracecmd_input *stream; struct buffer_instance *instance; struct tep_record *record; }; void show_file(const char *name); struct tracecmd_input *read_trace_header(const char *file, int flags); int read_trace_files(void); void trace_record(int argc, char **argv); void trace_stop(int argc, char **argv); void trace_restart(int argc, char **argv); void trace_reset(int argc, char **argv); void trace_start(int argc, char **argv); void trace_set(int argc, char **argv); void trace_extract(int argc, char **argv); void trace_stream(int argc, char **argv); void trace_profile(int argc, char **argv); void trace_report(int argc, char **argv); void trace_split(int argc, char **argv); void trace_listen(int argc, char **argv); void trace_agent(int argc, char **argv); void trace_setup_guest(int argc, char **argv); void trace_restore(int argc, char **argv); void trace_clear(int argc, char **argv); void trace_check_events(int argc, char **argv); void trace_stack(int argc, char **argv); void trace_option(int argc, char **argv); void trace_hist(int argc, char **argv); void trace_snapshot(int argc, char **argv); void trace_mem(int argc, char **argv); void trace_stat(int argc, char **argv); void trace_show(int argc, char **argv); void trace_list(int argc, char **argv); void trace_usage(int argc, char **argv); void trace_dump(int argc, char **argv); void trace_attach(int argc, char **argv); void trace_convert(int argc, char **argv); void trace_sqlhist (int argc, char **argv); int trace_record_agent(struct tracecmd_msg_handle *msg_handle, int cpus, int *fds, int argc, char **argv, bool use_fifos, struct tracecmd_time_sync *tsync, unsigned long long trace_id, int rcid, const char *host); struct hook_list; void trace_init_profile(struct tracecmd_input *handle, struct hook_list *hooks, int global); int do_trace_profile(void); void trace_profile_set_merge_like_comms(void); struct tracecmd_input * trace_stream_init(struct buffer_instance *instance, int cpu, int fd, int cpus, struct hook_list *hooks, tracecmd_handle_init_func handle_init, int global); int trace_stream_read(struct pid_record_data *pids, int nr_pids, long sleep_us); void trace_show_data(struct tracecmd_input *handle, struct tep_record *record); /* --- event interation --- */ /* * Use this to iterate through the event directories */ enum event_process { PROCESSED_NONE, PROCESSED_EVENT, PROCESSED_SYSTEM }; enum process_type { PROCESS_EVENT, PROCESS_SYSTEM }; struct event_iter { DIR *system_dir; DIR *event_dir; struct dirent *system_dent; struct dirent *event_dent; }; enum event_iter_type { EVENT_ITER_NONE, EVENT_ITER_SYSTEM, EVENT_ITER_EVENT }; struct event_iter *trace_event_iter_alloc(const char *path); enum event_iter_type trace_event_iter_next(struct event_iter *iter, const char *path, const char *system); void trace_event_iter_free(struct event_iter *iter); char *append_file(const char *dir, const char *name); char *get_file_content(const char *file); char *strstrip(char *str); /* --- instance manipulation --- */ enum buffer_instance_flags { BUFFER_FL_KEEP = 1 << 0, BUFFER_FL_PROFILE = 1 << 1, BUFFER_FL_GUEST = 1 << 2, BUFFER_FL_AGENT = 1 << 3, BUFFER_FL_HAS_CLOCK = 1 << 4, BUFFER_FL_TSC2NSEC = 1 << 5, BUFFER_FL_NETWORK = 1 << 6, BUFFER_FL_PROXY = 1 << 7, }; struct func_list { struct func_list *next; const char *func; const char *mod; }; struct pid_addr_maps { struct pid_addr_maps *next; struct tracecmd_proc_addr_map *lib_maps; unsigned int nr_lib_maps; char *proc_name; int pid; }; struct opt_list { struct opt_list *next; const char *option; }; struct filter_pids { struct filter_pids *next; int pid; int exclude; }; struct tsc_nsec { int mult; int shift; unsigned long long offset; }; struct buffer_instance { struct buffer_instance *next; char *name; struct tracefs_instance *tracefs; unsigned long long trace_id; char *cpumask; char *output_file; const char *temp_dir; char *temp_file; struct event_list *events; struct event_list **event_next; bool delete; struct event_list *sched_switch_event; struct event_list *sched_wakeup_event; struct event_list *sched_wakeup_new_event; const char *plugin; char *filter_mod; struct func_list *filter_funcs; struct func_list *notrace_funcs; struct opt_list *options; struct filter_pids *filter_pids; struct filter_pids *process_pids; char *common_pid_filter; int nr_filter_pids; int len_filter_pids; int nr_process_pids; bool ptrace_child; int have_set_event_pid; int have_event_fork; int have_func_fork; int get_procmap; const char *clock; unsigned int *client_ports; struct trace_seq *s_save; struct trace_seq *s_print; struct tracecmd_input *handle; struct tracecmd_msg_handle *msg_handle; struct tracecmd_output *network_handle; const char *host; struct pid_addr_maps *pid_maps; char *max_graph_depth; int flags; int tracing_on_init_val; int tracing_on_fd; int buffer_size; int old_buffer_size; int subbuf_size; int old_subbuf_size; int cpu_count; int proxy_fd; int argc; char **argv; struct addrinfo *result; unsigned int cid; unsigned int port; int *fds; bool use_fifos; enum port_type port_type; /* Default to USE_UDP (zero) */ int tsync_loop_interval; struct tracecmd_time_sync *tsync; }; void init_top_instance(void); extern struct buffer_instance top_instance; extern struct buffer_instance *buffer_instances; extern struct buffer_instance *first_instance; #define for_each_instance(i) for (i = buffer_instances; i; i = (i)->next) #define for_all_instances(i) for (i = first_instance; i; \ i = i == &top_instance ? buffer_instances : (i)->next) #define is_agent(instance) ((instance)->flags & BUFFER_FL_AGENT) #define is_guest(instance) ((instance)->flags & BUFFER_FL_GUEST) #define is_proxy(instance) ((instance)->flags & BUFFER_FL_PROXY) #define is_network(instance) ((instance)->flags & BUFFER_FL_NETWORK) #define is_proxy_server(instance) \ ((instance)->msg_handle && \ (instance)->msg_handle->flags & TRACECMD_MSG_FL_PROXY) #define START_PORT_SEARCH 1500 #define MAX_PORT_SEARCH 6000 struct sockaddr_storage; int trace_net_make(int port, enum port_type type); int trace_net_search(int start_port, int *sfd, enum port_type type); int trace_net_print_connection(int fd); bool trace_net_cmp_connection(struct sockaddr_storage *addr, const char *name); bool trace_net_cmp_connection_fd(int fd, const char *name); struct buffer_instance *allocate_instance(const char *name); void add_instance(struct buffer_instance *instance, int cpu_count); void update_first_instance(struct buffer_instance *instance, int topt); void show_instance_file(struct buffer_instance *instance, const char *name); void show_options(const char *prefix, struct buffer_instance *buffer); struct trace_guest { struct tracefs_instance *instance; char *name; unsigned long long trace_id; int cid; int pid; int cpu_max; int *cpu_pid; int *task_pids; }; struct trace_guest *trace_get_guest(unsigned int cid, const char *name); bool trace_have_guests_pid(void); void read_qemu_guests(void); int get_guest_pid(unsigned int guest_cid); int get_guest_vcpu_pid(unsigned int guest_cid, unsigned int guest_vcpu); void trace_add_guest_info(struct tracecmd_output *handle, struct buffer_instance *instance); struct tracecmd_time_sync * trace_tsync_as_host(int fd, unsigned long long trace_id, int loop_interval, int guest_id, int guest_cpus, const char *proto_name, const char *clock); struct tracecmd_time_sync * trace_tsync_as_guest(int fd, const char *tsync_proto, const char *clock, unsigned int remote_id, unsigned int local_id); char *strparse(char *str, char delim, char **save); /* moved from trace-cmd.h */ void tracecmd_remove_instances(void); int tracecmd_add_event(const char *event_str, int stack); void tracecmd_enable_events(void); void tracecmd_disable_all_tracing(int disable_tracer); void tracecmd_disable_tracing(void); void tracecmd_enable_tracing(void); void tracecmd_stat_cpu(struct trace_seq *s, int cpu); int tracecmd_host_tsync(struct buffer_instance *instance, unsigned int tsync_port); void tracecmd_host_tsync_complete(struct buffer_instance *instance); const char *tracecmd_guest_tsync(struct tracecmd_tsync_protos *tsync_protos, char *clock, unsigned int *tsync_port, pthread_t *thr_id); int trace_make_vsock(unsigned int port); int trace_get_vsock_port(int sd, unsigned int *port); int trace_open_vsock(unsigned int cid, unsigned int port); int get_local_cid(unsigned int *cid); char *trace_get_guest_file(const char *file, const char *guest); #ifdef VSOCK int trace_vsock_open(unsigned int cid, unsigned int port); int trace_vsock_make(unsigned int port); int trace_vsock_make_any(void); int get_vsocket_params(int fd, unsigned int *lcid, unsigned int *rcid); int trace_vsock_get_port(int sd, unsigned int *port); bool trace_vsock_can_splice_read(void); int trace_vsock_local_cid(void); int trace_vsock_print_connection(int fd); #else static inline int trace_vsock_open(unsigned int cid, unsigned int port) { return -ENOTSUP; } static inline int trace_vsock_make(unsigned int port) { return -ENOTSUP; } static inline int trace_vsock_make_any(void) { return -ENOTSUP; } static inline int get_vsocket_params(int fd, unsigned int *lcid, unsigned int *rcid) { return -ENOTSUP; } static inline int trace_vsock_get_port(int sd, unsigned int *port) { return -ENOTSUP; } static inline bool trace_vsock_can_splice_read(void) { return false; } static inline int trace_vsock_local_cid(void) { return -ENOTSUP; } static inline int trace_vsock_print_connection(int fd) { return -1; } #endif /* VSOCK */ /* No longer in event-utils.h */ __printf(1,2) void __noreturn die(const char *fmt, ...); /* Can be overriden */ void *malloc_or_die(unsigned int size); /* Can be overridden */ __printf(1,2) void __noreturn __die(const char *fmt, ...); void __noreturn _vdie(const char *fmt, va_list ap); static inline bool is_digits(const char *s) { for (; *s; s++) if (!isdigit(*s)) return false; return true; } bool trace_tsc2nsec_is_supported(void); void make_pid_name(char *buf, const char *pidfile_basename); void remove_pid_file(const char *pidfile_basename); void make_pid_file(const char *pidfile_basename); static inline void set_tcp_no_delay(int sockfd, int socktype) { int flag = 1; if (socktype == SOCK_STREAM) setsockopt(sockfd, IPPROTO_TCP, TCP_NODELAY, &flag, sizeof(flag)); } #endif /* __TRACE_LOCAL_H */ trace-cmd-v3.3.1/tracecmd/meson.build000066400000000000000000000024071470231550600174750ustar00rootroot00000000000000# SPDX-License-Identifier: GPL-2.0 # # Copyright (c) 2023 Daniel Wagner, SUSE LLC sources = [ 'trace-agent.c', 'trace-attach.c', 'trace-check-events.c', 'trace-clear.c', 'trace-cmd.c', 'trace-convert.c', 'trace-dump.c', 'trace-hist.c', 'trace-list.c', 'trace-listen.c', 'trace-mem.c', 'trace-profile.c', 'trace-read.c', 'trace-record.c', 'trace-restore.c', 'trace-setup-guest.c', 'trace-show.c', 'trace-snapshot.c', 'trace-split.c', 'trace-stack.c', 'trace-stat.c', 'trace-stream.c', 'trace-tsync.c', 'trace-usage.c', 'trace-vm.c', 'trace-sqlhist.c', ] if vsock_defined sources += 'trace-vsock.c' endif trace_cmd_incdir = include_directories(['.', 'include']) executable( 'trace-cmd', sources, dependencies: [ libtraceevent_dep, libtracefs_dep, zlib_dep, libzstd_dep, audit_dep], include_directories: [ incdir, trace_cmd_incdir, libtracecmd_incdir, libtracecmd_private_incdir, libtracecmd_ext_incdir], link_with: [static_libtracecmd], install: true, install_dir: bindir) install_data( 'trace-cmd.bash', install_dir: datadir + '/bash-completion/completions') trace-cmd-v3.3.1/tracecmd/trace-agent.c000066400000000000000000000232071470231550600176720ustar00rootroot00000000000000// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2018 VMware Inc, Slavomir Kaslev * * based on prior implementation by Yoshihiro Yunomae * Copyright (C) 2013 Hitachi, Ltd. * Yoshihiro YUNOMAE */ #include #include #include #include #include #include #include #include #include #include #include #include "trace-local.h" #include "trace-msg.h" #define GUEST_NAME "::GUEST::" #define dprint(fmt, ...) tracecmd_debug(fmt, ##__VA_ARGS__) static void make_vsocks(int nr, int *fds, unsigned int *ports) { unsigned int port; int i, fd, ret; for (i = 0; i < nr; i++) { fd = trace_vsock_make_any(); if (fd < 0) die("Failed to open vsocket"); ret = trace_vsock_get_port(fd, &port); if (ret < 0) die("Failed to get vsocket address"); fds[i] = fd; ports[i] = port; } } static void make_net(int nr, int *fds, unsigned int *ports) { int port; int i, fd; int start_port = START_PORT_SEARCH; for (i = 0; i < nr; i++) { port = trace_net_search(start_port, &fd, USE_TCP); if (port < 0) die("Failed to open socket"); if (listen(fd, 5) < 0) die("Failed to listen on port %d\n", port); fds[i] = fd; ports[i] = port; dprint("CPU[%d]: fd:%d port:%d\n", i, fd, port); start_port = port + 1; } } static void make_sockets(int nr, int *fds, unsigned int *ports, const char * network) { if (network) return make_net(nr, fds, ports); else return make_vsocks(nr, fds, ports); } static int open_agent_fifos(int nr_cpus, int *fds) { char path[PATH_MAX]; int i, fd, ret; for (i = 0; i < nr_cpus; i++) { snprintf(path, sizeof(path), VIRTIO_FIFO_FMT, i); fd = open(path, O_WRONLY); if (fd < 0) { ret = -errno; goto cleanup; } fds[i] = fd; } return 0; cleanup: while (--i >= 0) close(fds[i]); return ret; } static char *get_clock(int argc, char **argv) { int i; if (!argc || !argv) return NULL; for (i = 0; i < argc - 1; i++) { if (!strcmp("-C", argv[i])) return argv[i+1]; } return NULL; } static void trace_print_connection(int fd, const char *network) { int ret; if (network) ret = trace_net_print_connection(fd); else ret = trace_vsock_print_connection(fd); if (ret < 0) tracecmd_debug("Could not print connection fd:%d\n", fd); } static int wait_for_connection(int fd) { int sd; if (fd < 0) return -1; while (true) { tracecmd_debug("Listening on fd:%d\n", fd); sd = accept(fd, NULL, NULL); tracecmd_debug("Accepted fd:%d\n", sd); if (sd < 0) { if (errno == EINTR) continue; return -1; } break; } close(fd); return sd; } static void agent_handle(int sd, int nr_cpus, int page_size, int cid, int rcid, const char *network) { struct tracecmd_tsync_protos *tsync_protos = NULL; struct tracecmd_time_sync *tsync = NULL; struct tracecmd_msg_handle *msg_handle; const char *tsync_proto = NULL; struct trace_guest *guest; unsigned long long peer_trace_id; unsigned long long trace_id; unsigned long flags = rcid >= 0 ? TRACECMD_MSG_FL_PROXY : 0; enum tracecmd_time_sync_role tsync_role = TRACECMD_TIME_SYNC_ROLE_GUEST; unsigned int remote_id; unsigned int local_id; unsigned int tsync_port = 0; unsigned int *ports; unsigned int client_cpus = 0; unsigned int guests = 0; char **argv = NULL; int argc = 0; bool use_fifos; int *fds; int ret; int fd; fds = calloc(nr_cpus, sizeof(*fds)); ports = calloc(nr_cpus, sizeof(*ports)); if (!fds || !ports) die("Failed to allocate memory"); msg_handle = tracecmd_msg_handle_alloc(sd, flags); if (!msg_handle) die("Failed to allocate message handle"); if (rcid >= 0) { tsync_role = TRACECMD_TIME_SYNC_ROLE_HOST; ret = tracecmd_msg_recv_trace_proxy(msg_handle, &argc, &argv, &use_fifos, &peer_trace_id, &tsync_protos, &client_cpus, &guests); /* Update the guests peer_trace_id */ guest = trace_get_guest(rcid, NULL); if (guest) guest->trace_id = peer_trace_id; } else { ret = tracecmd_msg_recv_trace_req(msg_handle, &argc, &argv, &use_fifos, &peer_trace_id, &tsync_protos); } if (ret < 0) die("Failed to receive trace request"); tsync_proto = tracecmd_tsync_get_proto(tsync_protos, get_clock(argc, argv), tsync_role); if (use_fifos && open_agent_fifos(nr_cpus, fds)) use_fifos = false; if (!use_fifos) make_sockets(nr_cpus, fds, ports, network); if (tsync_proto) { if (network) { /* For now just use something */ remote_id = 2; local_id = 1; tsync_port = trace_net_search(START_PORT_SEARCH, &fd, USE_TCP); if (listen(fd, 5) < 0) die("Failed to listen on %d\n", tsync_port); } else { if (get_vsocket_params(msg_handle->fd, &local_id, &remote_id)) { warning("Failed to get local and remote ids"); /* Just make something up */ remote_id = -1; local_id = -2; } fd = trace_vsock_make_any(); if (fd >= 0 && trace_vsock_get_port(fd, &tsync_port) < 0) { close(fd); fd = -1; } } } trace_id = tracecmd_generate_traceid(); ret = tracecmd_msg_send_trace_resp(msg_handle, nr_cpus, page_size, ports, use_fifos, trace_id, tsync_proto, tsync_port); if (ret < 0) die("Failed to send trace response"); if (tsync_proto) { fd = wait_for_connection(fd); if (rcid >= 0) { tsync = trace_tsync_as_host(fd, trace_id, 0, rcid, client_cpus, tsync_proto, get_clock(argc, argv)); } else { tsync = trace_tsync_as_guest(fd, tsync_proto, get_clock(argc, argv), remote_id, local_id); } if (!tsync) close(fd); } trace_record_agent(msg_handle, nr_cpus, fds, argc, argv, use_fifos, tsync, trace_id, rcid, network); if (tsync) { if (rcid < 0) tracecmd_tsync_with_host_stop(tsync); tracecmd_tsync_free(tsync); } if (tsync_protos) { free(tsync_protos->names); free(tsync_protos); } free(argv[0]); free(argv); free(ports); free(fds); tracecmd_msg_handle_close(msg_handle); exit(0); } static volatile pid_t handler_pid; static void handle_sigchld(int sig) { int wstatus; pid_t pid; for (;;) { pid = waitpid(-1, &wstatus, WNOHANG); if (pid <= 0) break; if (pid == handler_pid) handler_pid = 0; } } static pid_t do_fork() { /* in debug mode, we do not fork off children */ if (tracecmd_get_debug()) return 0; return fork(); } static void agent_serve(unsigned int port, bool do_daemon, int proxy_id, const char *network) { struct sockaddr_storage net_addr; struct sockaddr *addr = NULL; socklen_t *addr_len_p = NULL; socklen_t addr_len = sizeof(net_addr); int sd, cd, nr_cpus; unsigned int cid = -1, rcid = -1; pid_t pid; signal(SIGCHLD, handle_sigchld); if (network) { addr = (struct sockaddr *)&net_addr; addr_len_p = &addr_len; } nr_cpus = tracecmd_count_cpus(); page_size = getpagesize(); if (network) { sd = trace_net_make(port, USE_TCP); if (listen(sd, 5) < 0) die("Failed to listen on %d\n", port); } else sd = trace_vsock_make(port); if (sd < 0) die("Failed to open socket"); tracecmd_tsync_init(); if (!network) { cid = trace_vsock_local_cid(); if (cid >= 0) printf("listening on @%u:%u\n", cid, port); } if (do_daemon && daemon(1, 0)) die("daemon"); for (;;) { cd = accept(sd, addr, addr_len_p); if (cd < 0) { if (errno == EINTR) continue; die("accept"); } if (proxy_id >= 0) { /* Only works with vsockets */ if (get_vsocket_params(cd, NULL, &rcid) < 0) { dprint("Failed to find connected cid"); close(cd); continue; } if (rcid != proxy_id) { dprint("Cid %d does not match expected cid %d\n", rcid, proxy_id); close(cd); continue; } } if (tracecmd_get_debug()) trace_print_connection(cd, network); if (network && !trace_net_cmp_connection(&net_addr, network)) { dprint("Client does not match '%s'\n", network); close(cd); continue; } if (handler_pid) goto busy; pid = do_fork(); if (pid == 0) { close(sd); signal(SIGCHLD, SIG_DFL); agent_handle(cd, nr_cpus, page_size, cid, rcid, network); } if (pid > 0) handler_pid = pid; busy: close(cd); } } enum { OPT_verbose = 254, OPT_debug = 255, OPT_notimeout = 256, }; void trace_agent(int argc, char **argv) { struct trace_guest *guest; bool do_daemon = false; unsigned int port = TRACE_AGENT_DEFAULT_PORT; const char *network = NULL; int proxy_id = -1; if (argc < 2) usage(argv); if (strcmp(argv[1], "agent") != 0) usage(argv); for (;;) { int c, option_index = 0; static struct option long_options[] = { {"port", required_argument, NULL, 'p'}, {"help", no_argument, NULL, '?'}, {"debug", no_argument, NULL, OPT_debug}, {"notimeout", no_argument, NULL, OPT_notimeout}, {"verbose", optional_argument, NULL, OPT_verbose}, {NULL, 0, NULL, 0} }; c = getopt_long(argc-1, argv+1, "+hp:DN:P:", long_options, &option_index); if (c == -1) break; switch (c) { case 'h': usage(argv); break; case 'N': network = optarg; break; case 'p': port = atoi(optarg); if (proxy_id >= 0) die("-N cannot be used with -P"); break; case 'D': do_daemon = true; break; case 'P': proxy_id = atoi(optarg); guest = trace_get_guest(proxy_id, GUEST_NAME); if (!guest) die("Failed to allocate guest instance"); break; case OPT_debug: tracecmd_set_debug(true); break; case OPT_notimeout: tracecmd_set_notimeout(true); break; case OPT_verbose: if (trace_set_verbose(optarg) < 0) die("invalid verbose level %s", optarg); break; default: usage(argv); } } if (optind < argc-1) usage(argv); agent_serve(port, do_daemon, proxy_id, network); } trace-cmd-v3.3.1/tracecmd/trace-attach.c000066400000000000000000000252751470231550600200470ustar00rootroot00000000000000// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2023 Google Inc, Steven Rostedt * */ #include #include #include #include #include "tracefs.h" #include "trace-local.h" struct timeshift_sample { struct timeshift_sample *next; long long offset; long long scaling; long long timestamp; long long fract; }; struct vcpu_pid { struct vcpu_pid *next; int pid; int cpu; }; static unsigned int num_cpus; static void *vcpu_pids; static struct timeshift_sample *tshifts; static struct timeshift_sample **tshifts_next = &tshifts; static u64 set_value(const char *str, const char *type, u64 def) { if (str && str[0] != '\0' && str[0] != '-' && !isdigit(str[0])) die("Bad %s value", type); if (str && str[0]) return strtoull(str, NULL, 0); return def; } static void add_timeshift(char *shift) { struct timeshift_sample *tshift; char *timestamp_str; char *offset_str; char *scale_str; char *fract_str; char *saveptr; u64 timestamp; u64 offset; u64 scale; u64 fract; offset_str = strparse(shift, ',', &saveptr); scale_str = strparse(NULL, ',', &saveptr); fract_str = strparse(NULL, ',', &saveptr); timestamp_str = strparse(NULL, ',', &saveptr); if (!offset_str) die("Bad timeshift argument"); offset = set_value(offset_str, "offset", 0); scale = set_value(scale_str, "scaling", 1); fract = set_value(fract_str, "fraction", 0); timestamp = set_value(timestamp_str, "timestamp", 0); tshift = calloc(1, sizeof(*tshift)); if (!tshift) die("Could not allocate timeshift"); *tshifts_next = tshift; tshifts_next = &tshift->next; tshift->offset = offset; tshift->scaling = scale; tshift->fract = fract; tshift->timestamp = timestamp; } static void free_timeshifts(void) { struct timeshift_sample *tshift; while (tshifts) { tshift = tshifts; tshifts = tshift->next; free(tshift); } } static void add_vcpu_pid(const char *pid) { struct vcpu_pid *vpid; vpid = calloc(1, sizeof(*vpid)); vpid->pid = atoi(pid); vpid->cpu = -1; vpid->next = vcpu_pids; vcpu_pids = vpid; } static void free_vcpu_pids(void) { struct vcpu_pid *vpid; while (vcpu_pids) { vpid = vcpu_pids; vcpu_pids = vpid->next; free(vpid); } } static inline int test_vcpu_id(struct tep_format_field **vcpu_id_field, struct tep_event *event, struct tep_record *record) { unsigned long long val; struct vcpu_pid *vpid; bool done = true; int pid; int cnt = 0; if (!*vcpu_id_field) { *vcpu_id_field = tep_find_field(event, "vcpu_id"); if (!*vcpu_id_field) die("Could not find vcpu_id field"); } pid = tep_data_pid(event->tep, record); for (vpid = vcpu_pids; vpid; vpid = vpid->next) { if (vpid->cpu < 0) { done = false; } else { cnt++; continue; } if (vpid->pid == pid) break; } if (done || (num_cpus && cnt == num_cpus)) return -1; if (!vpid) return 0; if (tep_read_number_field(*vcpu_id_field, record->data, &val)) die("Could not read data vcpu_id field"); vpid->cpu = (int)val; return 0; } static int entry_callback(struct tracecmd_input *handle, struct tep_event *event, struct tep_record *record, int cpu, void *data) { static struct tep_format_field *vcpu_id_field; return test_vcpu_id(&vcpu_id_field, event, record); } static int exit_callback(struct tracecmd_input *handle, struct tep_event *event, struct tep_record *record, int cpu, void *data) { static struct tep_format_field *vcpu_id_field; return test_vcpu_id(&vcpu_id_field, event, record); } static int cmp_vcpus(const void *A, const void *B) { struct vcpu_pid * const *a = A; struct vcpu_pid * const *b = B; if ((*a)->cpu < (*b)->cpu) return -1; return (*a)->cpu > (*b)->cpu; } static void update_end(char **end, void *data, int size, const char *stop) { char *str = *end; if (str + size > stop) die("Error in calculating buffer size"); memcpy(str, data, size); *end = str + size; } static void add_guest_to_host(struct tracecmd_output *host_ohandle, struct tracecmd_input *guest_ihandle) { unsigned long long guest_id; struct vcpu_pid **vcpu_list; struct vcpu_pid *vpid; char *name = ""; /* TODO, add name for guest */ char *stop; char *buf; char *end; int cpus = 0; int cpu; int size; guest_id = tracecmd_get_traceid(guest_ihandle); for (vpid = vcpu_pids; vpid ; vpid = vpid->next) { if (vpid->cpu < 0) continue; cpus++; } vcpu_list = calloc(cpus, sizeof(*vcpu_list)); if (!vcpu_list) die("Could not allocate vCPU list"); cpus = 0; for (vpid = vcpu_pids; vpid ; vpid = vpid->next) { if (vpid->cpu < 0) continue; vcpu_list[cpus++] = vpid; } qsort(vcpu_list, cpus, sizeof(*vcpu_list), cmp_vcpus); size = strlen(name) + 1; size += sizeof(int) + sizeof(long long); size += cpus * (sizeof(int) * 2); buf = calloc(1, size); if (!buf) die("Failed allocation"); end = buf; stop = buf + size; /* TODO match endianess of existing file */ update_end(&end, name, strlen(name) + 1, stop); update_end(&end, &guest_id, sizeof(guest_id), stop); update_end(&end, &cpus, sizeof(cpus), stop); for (cpu = 0; cpu < cpus; cpu++) { int vcpu = vcpu_list[cpu]->cpu; int pid = vcpu_list[cpu]->pid; update_end(&end, &cpu, sizeof(vcpu), stop); update_end(&end, &pid, sizeof(pid), stop); } if (tracecmd_add_option(host_ohandle, TRACECMD_OPTION_GUEST, size, buf) == NULL) die("Failed to add GUEST option to host"); free(vcpu_list); free(buf); } static void add_timeshift_to_guest(struct tracecmd_output *guest_ohandle, struct tracecmd_input *host_ihandle) { struct timeshift_sample *tshift = tshifts; struct timeshift_sample *last_tshift = NULL; unsigned long long host_id; char *stop; char *end; char *buf; int proto; int size = 0; int cpus; int cpu; host_id = tracecmd_get_traceid(host_ihandle); cpus = num_cpus; proto = 0; /* For now we just have zero */ /* * option size is: * trace id: 8 bytes * protocol flags: 4 bytes * CPU count: 4 bytes * * For each CPU: * sample cnt: 4 bytes * list of times: 8 bytes * sample cnt * list of offsets: 8 bytes * sample cnt * list of scaling: 8 bytes * sample cnt * * For each CPU: * list of fract: 8 bytes * CPU count */ size = 8 + 4 + 4; /* Include fraction bits here */ size += 8 * cpus; /* We only have one sample per CPU (for now) */ size += (4 + 8 * 3) * cpus; buf = calloc(1, size); if (!buf) die("Failed to allocate timeshift buffer"); end = buf; stop = buf + size; update_end(&end, &host_id, sizeof(host_id), stop); update_end(&end, &proto, sizeof(proto), stop); update_end(&end, &cpus, sizeof(cpus), stop); for (cpu = 0; cpu < cpus; cpu++) { struct timeshift_sample *tsample = tshift; unsigned long long sample; int cnt = 1; if (!tsample) tsample = last_tshift; if (!tsample) die("No samples given"); last_tshift = tsample; update_end(&end, &cnt, sizeof(cnt), stop); sample = tsample->timestamp; update_end(&end, &sample, sizeof(sample), stop); sample = tsample->offset; update_end(&end, &sample, sizeof(sample), stop); sample = tsample->scaling; update_end(&end, &sample, sizeof(sample), stop); } tshift = tshifts; last_tshift = NULL; for (cpu = 0; cpu < cpus; cpu++) { struct timeshift_sample *tsample = tshift; unsigned long long sample; if (!tsample) tsample = last_tshift; last_tshift = tsample; sample = tsample->fract; update_end(&end, &sample, sizeof(sample), stop); } if (tracecmd_add_option(guest_ohandle, TRACECMD_OPTION_TIME_SHIFT, size, buf) == NULL) die("Failed to add TIME SHIFT option"); free(buf); } static void add_tsc2nsec_to_guest(struct tracecmd_output *guest_ohandle, struct tracecmd_input *host_ihandle) { unsigned long long offset; int mult; int shift; int ret; char buf[sizeof(int) * 2 + sizeof(long long)]; char *stop; char *end; int size = sizeof(buf); ret = tracecmd_get_tsc2nsec(host_ihandle, &mult, &shift, &offset); if (ret < 0) die("Host does not have tsc2nsec info"); end = buf; stop = buf + size; update_end(&end, &mult, sizeof(mult), stop); update_end(&end, &shift, sizeof(shift), stop); update_end(&end, &offset, sizeof(offset), stop); if (tracecmd_add_option(guest_ohandle, TRACECMD_OPTION_TSC2NSEC, size, buf) == NULL) die("Failed to add TSC2NSEC option"); } static void map_cpus(struct tracecmd_input *handle) { int entry_ret; int exit_ret; entry_ret = tracecmd_follow_event(handle, "kvm", "kvm_entry", entry_callback, NULL); exit_ret = tracecmd_follow_event(handle, "kvm", "kvm_exit", exit_callback, NULL); if (entry_ret < 0 && exit_ret < 0) die("Host needs kvm_exit or kvm_entry events to attach"); tracecmd_iterate_events(handle, NULL, 0, NULL, NULL); } void trace_attach(int argc, char **argv) { struct tracecmd_input *guest_ihandle; struct tracecmd_input *host_ihandle; struct tracecmd_output *guest_ohandle; struct tracecmd_output *host_ohandle; unsigned long long guest_id; char *guest_file; char *host_file; int ret; int fd; for (;;) { int c; c = getopt(argc-1, argv+1, "c:s:h"); if (c == -1) break; switch (c) { case 'h': usage(argv); break; case 's': add_timeshift(optarg); break; case 'c': num_cpus = atoi(optarg); break; default: usage(argv); } } /* Account for "attach" */ optind++; if ((argc - optind) < 3) usage(argv); host_file = argv[optind++]; guest_file = argv[optind++]; for (; optind < argc; optind++) add_vcpu_pid(argv[optind]); host_ihandle = tracecmd_open(host_file,TRACECMD_FL_LOAD_NO_PLUGINS ); guest_ihandle = tracecmd_open(guest_file,TRACECMD_FL_LOAD_NO_PLUGINS ); if (!host_ihandle) die("Could not read %s\n", host_file); if (!guest_ihandle) die("Could not read %s\n", guest_file); guest_id = tracecmd_get_traceid(guest_ihandle); if (!guest_id) die("Guest data file does not contain traceid"); map_cpus(host_ihandle); ret = tracecmd_get_guest_cpumap(host_ihandle, guest_id, NULL, NULL, NULL); if (ret == 0) { printf("Guest is already mapped in host (id=0x%llx) .. skipping ...\n", guest_id); } else { fd = open(host_file, O_RDWR); if (fd < 0) die("Could not write %s", host_file); host_ohandle = tracecmd_get_output_handle_fd(fd); if (!host_ohandle) die("Error setting up %s for write", host_file); add_guest_to_host(host_ohandle, guest_ihandle); tracecmd_output_close(host_ohandle); } fd = open(guest_file, O_RDWR); if (fd < 0) die("Could not write %s", guest_file); guest_ohandle = tracecmd_get_output_handle_fd(fd); if (!guest_ohandle) die("Error setting up %s for write", guest_file); add_timeshift_to_guest(guest_ohandle, host_ihandle); add_tsc2nsec_to_guest(guest_ohandle, host_ihandle); tracecmd_output_close(guest_ohandle); tracecmd_close(guest_ihandle); tracecmd_close(host_ihandle); free_timeshifts(); free_vcpu_pids(); return; } trace-cmd-v3.3.1/tracecmd/trace-check-events.c000066400000000000000000000027631470231550600211570ustar00rootroot00000000000000// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt * */ #include #include #include #include "tracefs.h" #include "trace-local.h" enum { OPT_verbose = 255, }; void trace_check_events(int argc, char **argv) { const char *tracing; int ret, c; int parsing_failures = 0; struct tep_handle *pevent = NULL; struct tep_plugin_list *list = NULL; int open_flags = 0; int option_index = 0; static struct option long_options[] = { {"verbose", optional_argument, NULL, OPT_verbose}, {NULL, 0, NULL, 0} }; while ((c = getopt_long(argc-1, argv+1, "+hN", long_options, &option_index)) >= 0) { switch (c) { case 'h': default: usage(argv); break; case 'N': open_flags |= TRACECMD_FL_LOAD_NO_PLUGINS; break; case OPT_verbose: if (trace_set_verbose(optarg) < 0) die("invalid verbose level %s", optarg); break; } } tracing = tracefs_tracing_dir(); if (!tracing) { printf("Can not find or mount tracing directory!\n" "Either tracing is not configured for this " "kernel\n" "or you do not have the proper permissions to " "mount the directory"); exit(EINVAL); } pevent = tep_alloc(); if (!pevent) exit(EINVAL); list = trace_load_plugins(pevent, open_flags); ret = tracefs_fill_local_events(tracing, pevent, &parsing_failures); if (ret || parsing_failures) ret = EINVAL; tep_unload_plugins(list, pevent); tep_free(pevent); return; } trace-cmd-v3.3.1/tracecmd/trace-clear.c000066400000000000000000000046471470231550600176710ustar00rootroot00000000000000// SPDX-License-Identifier: LGPL-2.1 /* * Copyright (C) 2008, 2009, 2010 Red Hat Inc, Steven Rostedt * * Updates: * Copyright (C) 2020, VMware, Tzvetomir Stoyanov * */ #include #include #include #include "tracefs.h" #include "trace-local.h" struct instances_list { struct instances_list *next; struct tracefs_instance *instance; }; static int add_new_instance(struct instances_list **list, char *name) { struct instances_list *new; if (!tracefs_instance_exists(name)) return -1; new = calloc(1, sizeof(*new)); if (!new) return -1; new->instance = tracefs_instance_create(name); if (!new->instance) { free(new); return -1; } new->next = *list; *list = new; return 0; } static int add_instance_walk(const char *name, void *data) { return add_new_instance((struct instances_list **)data, (char *)name); } static void clear_list(struct instances_list *list) { struct instances_list *del; while (list) { del = list; list = list->next; tracefs_instance_free(del->instance); free(del); } } static void clear_instance_trace(struct tracefs_instance *instance) { FILE *fp; char *path; /* reset the trace */ path = tracefs_instance_get_file(instance, "trace"); fp = fopen(path, "w"); if (!fp) die("writing to '%s'", path); tracefs_put_tracing_file(path); fwrite("0", 1, 1, fp); fclose(fp); } static void clear_trace(struct instances_list *instances) { if (instances) { while (instances) { clear_instance_trace(instances->instance); instances = instances->next; } } else clear_instance_trace(NULL); } void trace_clear(int argc, char **argv) { struct instances_list *instances = NULL; bool all = false; int c; for (;;) { int option_index = 0; static struct option long_options[] = { {"all", no_argument, NULL, 'a'}, {"help", no_argument, NULL, '?'}, {NULL, 0, NULL, 0} }; c = getopt_long (argc-1, argv+1, "+haB:", long_options, &option_index); if (c == -1) break; switch (c) { case 'B': if (add_new_instance(&instances, optarg)) die("Failed to allocate instance %s", optarg); break; case 'a': all = true; if (tracefs_instances_walk(add_instance_walk, &instances)) die("Failed to add all instances"); break; case 'h': case '?': default: usage(argv); break; } } clear_trace(instances); if (all) clear_trace(NULL); clear_list(instances); exit(0); } trace-cmd-v3.3.1/tracecmd/trace-cmd.bash000066400000000000000000000156311470231550600200340ustar00rootroot00000000000000show_instances() { local cur="$1" local bufs=$(trace-cmd list -B) if [ "$bufs" == "No buffer instances defined" ]; then return 0 fi COMPREPLY=( $(compgen -W "${bufs}" -- "${cur}") ) return 0 } show_virt() { local cur="$1" if ! which virsh &>/dev/null; then return 1 fi local virt=`virsh list | awk '/^ *[0-9]/ { print $2 }'` COMPREPLY=( $(compgen -W "${virt}" -- "${cur}") ) return 0 } show_options() { local cur="$1" local options=$(trace-cmd list -o | sed -e 's/^\(no\)*\(.*\)/\2 no\2/') COMPREPLY=( $(compgen -W "${options}" -- "${cur}") ) return 0 } __show_files() { COMPREPLY=( $(compgen -f -- "$cur") ) if [ ${#COMPREPLY[@]} -gt 1 ]; then return 0; fi # directories get '/' instead of space DIRS=( $(compgen -d -- "$cur")) if [ ${#DIRS[@]} -eq 1 ]; then compopt -o nospace COMPREPLY="$DIRS/" return 0; fi return 0 } cmd_options() { local type="$1" local cur="$2" local cmds=$(trace-cmd $type -h 2>/dev/null|grep "^ *-" | \ sed -e 's/ *\(-[^ ]*\).*/\1/') COMPREPLY=( $(compgen -W "${cmds}" -- "${cur}") ) if [ ${#COMPREPLY[@]} -eq 0 ]; then __show_files "${cur}" fi } plugin_options() { local cur="$1" local opts=$(trace-cmd list -O | sed -ne 's/option://p') COMPREPLY=( $(compgen -W "${opts}" -- "${cur}") ) } compression_param() { local opts=$(trace-cmd list -c | grep -v 'Supported' | cut -d "," -f1) opts+=" any none " COMPREPLY=( $(compgen -W "${opts}") ) } __trace_cmd_list_complete() { local prev=$1 local cur=$2 shift 2 local words=("$@") case "$prev" in list) local cmds=$(trace-cmd list -h |egrep "^ {10}-" | \ sed -e 's/.*\(-.\).*/\1/') COMPREPLY=( $(compgen -W "${cmds}" -- "${cur}") ) ;; *) size=${#words[@]} if [ $size -gt 3 ]; then if [ "$cur" == "-" ]; then let size=$size-3 else let size=$size-2 fi local w="${words[$size]}" if [ "$w" == "-e" ]; then local cmds=$(trace-cmd list -h |egrep "^ {12}-" | \ sed -e 's/.*\(-.\).*/\1/') COMPREPLY=( $(compgen -W "${cmds}" -- "${cur}") ) fi fi ;; esac } __trace_cmd_show_complete() { local prev=$1 local cur=$2 shift 2 local words=("$@") case "$prev" in -B) show_instances "$cur" ;; *) cmd_options show "$cur" ;; esac } __trace_cmd_extract_complete() { local prev=$1 local cur=$2 shift 2 local words=("$@") case "$prev" in extract) cmd_options "$prev" "$cur" ;; -B) show_instances "$cur" ;; *) __show_files ;; esac } __trace_cmd_record_complete() { local prev=$1 local cur=$2 shift 2 local words=("$@") case "$prev" in -e) local list=$(trace-cmd list -e "$cur") local prefix=${cur%%:*} if [ -z "$cur" -o "$cur" != "$prefix" ]; then COMPREPLY=( $(compgen -W "all ${list}" -- "${cur}") ) else local events=$(for e in $list; do echo ${e/*:/}; done | sort -u) local systems=$(for s in $list; do echo ${s/:*/:}; done | sort -u) COMPREPLY=( $(compgen -W "all ${events} ${systems}" -- "${cur}") ) fi # This is still to handle the "*:*" special case if [[ -n "$prefix" ]]; then local reply_n=${#COMPREPLY[*]} for (( i = 0; i < $reply_n; i++)); do COMPREPLY[$i]=${COMPREPLY[i]##${prefix}:} done fi ;; -p) local plugins=$(trace-cmd list -p) COMPREPLY=( $(compgen -W "${plugins}" -- "${cur}" ) ) ;; -l|-n|-g) # This is extremely slow still (may take >1sec). local funcs=$(trace-cmd list -f | sed 's/ .*//') COMPREPLY=( $(compgen -W "${funcs}" -- "${cur}") ) ;; -B) show_instances "$cur" ;; -O) show_options "$cur" ;; -A) if ! show_virt "$cur"; then cmd_options record "$cur" fi ;; --compression) compression_param ;; *) # stream start and profile do not show all options cmd_options record "$cur" ;; esac } __trace_cmd_report_complete() { local prev=$1 local cur=$2 shift 2 local words=("$@") case "$prev" in -O) plugin_options "$cur" ;; *) cmd_options report "$cur" ;; esac } __trace_cmd_dump_complete() { local prev=$1 local cur=$2 shift 2 local words=("$@") case "$prev" in -i) __show_files ;; *) cmd_options dump "$cur" ;; esac } __trace_cmd_convert_complete() { local prev=$1 local cur=$2 shift 2 local words=("$@") case "$prev" in -i) __show_files ;; -o) __show_files ;; --compression) compression_param ;; *) cmd_options convert "$cur" ;; esac } __show_command_options() { local command="$1" local prev="$2" local cur="$3" local cmds=( $(trace-cmd --help 2>/dev/null | \ grep " - " | sed 's/^ *//; s/ -.*//') ) for cmd in ${cmds[@]}; do if [ $cmd == "$command" ]; then local opts=$(trace-cmd $cmd -h 2>/dev/null|grep "^ *-" | \ sed -e 's/ *\(-[^ ]*\).*/\1/') if [ "$prev" == "-B" ]; then for opt in ${opts[@]}; do if [ "$opt" == "-B" ]; then show_instances "$cur" return 0 fi done fi COMPREPLY=( $(compgen -W "${opts}" -- "$cur")) break fi done if [ ${#COMPREPLY[@]} -eq 0 ]; then __show_files "${cur}" fi } _trace_cmd_complete() { local cur="" local prev="" local words=() # Not to use COMP_WORDS to avoid buggy behavior of Bash when # handling with words including ":", like: # # prev="${COMP_WORDS[COMP_CWORD-1]}" # cur="${COMP_WORDS[COMP_CWORD]}" # # Instead, we use _get_comp_words_by_ref() magic. _get_comp_words_by_ref -n : cur prev words if [ "$prev" == "trace-cmd" ]; then local cmds=$(trace-cmd --help 2>/dev/null | \ grep " - " | sed 's/^ *//; s/ -.*//') COMPREPLY=( $(compgen -W "${cmds}" -- "${cur}") ) return; fi local w="${words[1]}" case "$w" in list) __trace_cmd_list_complete "${prev}" "${cur}" ${words[@]} return 0 ;; show) __trace_cmd_show_complete "${prev}" "${cur}" ${words[@]} return 0 ;; extract) __trace_cmd_extract_complete "${prev}" "${cur}" ${words[@]} return 0 ;; record|stream|start|profile) __trace_cmd_record_complete "${prev}" "${cur}" ${words[@]} return 0 ;; report) __trace_cmd_report_complete "${prev}" "${cur}" ${words[@]} return 0 ;; dump) __trace_cmd_dump_complete "${prev}" "${cur}" ${words[@]} return 0 ;; convert) __trace_cmd_convert_complete "${prev}" "${cur}" ${words[@]} return 0 ;; *) __show_command_options "$w" "${prev}" "${cur}" ;; esac } complete -F _trace_cmd_complete trace-cmd trace-cmd-v3.3.1/tracecmd/trace-cmd.c000066400000000000000000000072461470231550600173440ustar00rootroot00000000000000// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2008, 2009, 2010 Red Hat Inc, Steven Rostedt * */ #include #include #include #include #include #include #include #include #include "trace-local.h" int silence_warnings; int show_status; #ifndef gettid #define gettid() syscall(__NR_gettid) #endif void warning(const char *fmt, ...) { va_list ap; if (silence_warnings) return; if (errno) perror("trace-cmd"); errno = 0; va_start(ap, fmt); fprintf(stderr, " "); vfprintf(stderr, fmt, ap); va_end(ap); fprintf(stderr, "\n"); } void *malloc_or_die(unsigned int size) { void *data; data = malloc(size); if (!data) die("malloc"); return data; } /* Same as strtok_r(), but allows empty tokens */ char *strparse(char *str, char delim, char **save) { char *next; if (!str) { str = *save; if ((*save)[0] == '\0') return NULL; } next = strchr(str, delim); if (next) { *next = '\0'; *save = next + 1; } else { *save = str + strlen(str); } return str; } void tracecmd_debug(const char *fmt, ...) { va_list ap; if (!tracecmd_get_debug()) return; va_start(ap, fmt); printf("[%d] ", (int)gettid()); vprintf(fmt, ap); va_end(ap); } static struct trace_log_severity { int id; const char *name; } log_severity[] = { { .id = TEP_LOG_NONE, .name = "none" }, { .id = TEP_LOG_CRITICAL, .name = "crit" }, { .id = TEP_LOG_ERROR, .name = "err" }, { .id = TEP_LOG_WARNING, .name = "warn" }, { .id = TEP_LOG_INFO, .name = "info" }, { .id = TEP_LOG_DEBUG, .name = "debug" }, { .id = TEP_LOG_ALL, .name = "all" }, }; void trace_set_loglevel(int level) { tracecmd_set_loglevel(level); tracefs_set_loglevel(level); tep_set_loglevel(level); } int trace_set_verbose(char *level) { int id; /* Default level is info */ if (!level) level = "info"; if (isdigit(level[0])) { id = atoi(level); if (id >= TEP_LOG_NONE) { if (id > TEP_LOG_ALL) id = TEP_LOG_ALL; trace_set_loglevel(id); return 0; } } else { int size = ARRAY_SIZE(log_severity); int i; for (i = 0; i < size; i++) { if (!strncmp(level, log_severity[i].name, strlen(log_severity[i].name))) { trace_set_loglevel(log_severity[i].id); return 0; } } } return -1; } /** * struct command * @name command name * @run function to execute on command `name` */ struct command { char *name; void (*run)(int argc, char **argv); }; /** * Lookup table that maps command names to functions */ struct command commands[] = { {"report", trace_report}, {"snapshot", trace_snapshot}, {"hist", trace_hist}, {"mem", trace_mem}, {"listen", trace_listen}, {"agent", trace_agent}, {"setup-guest", trace_setup_guest}, {"split", trace_split}, {"restore", trace_restore}, {"stack", trace_stack}, {"check-events", trace_check_events}, {"record", trace_record}, {"start", trace_start}, {"set", trace_set}, {"extract", trace_extract}, {"stop", trace_stop}, {"stream", trace_stream}, {"profile", trace_profile}, {"restart", trace_restart}, {"clear", trace_clear}, {"reset", trace_reset}, {"stat", trace_stat}, {"options", trace_option}, {"show", trace_show}, {"list", trace_list}, {"help", trace_usage}, {"dump", trace_dump}, {"attach", trace_attach}, {"convert", trace_convert}, {"sqlhist", trace_sqlhist}, {"-h", trace_usage}, }; int main (int argc, char **argv) { int i; errno = 0; if (argc < 2) trace_usage(argc, argv); for (i = 0; i < ARRAY_SIZE(commands); ++i) { if (strcmp(argv[1], commands[i].name) == 0 ){ commands[i].run(argc, argv); goto out; } } /* No valid command found, show help */ trace_usage(argc, argv); out: exit(0); } trace-cmd-v3.3.1/tracecmd/trace-convert.c000066400000000000000000000051701470231550600202530ustar00rootroot00000000000000// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2021, VMware, Tzvetomir Stoyanov */ #include #include #include #include #include #include "trace-local.h" #include "trace-cmd.h" #include "trace-cmd-private.h" static void convert_file(const char *in, const char *out, int file_version, char *compr) { struct tracecmd_input *ihandle; struct tracecmd_output *ohandle; ihandle = tracecmd_open_head(in, 0); if (!ihandle) die("error reading %s", in); ohandle = tracecmd_copy(ihandle, out, TRACECMD_FILE_CPU_FLYRECORD, file_version, compr); if (!ohandle) die("error writing %s", out); tracecmd_output_close(ohandle); tracecmd_close(ihandle); } enum { OPT_file_version = 254, OPT_compression = 255, }; void trace_convert(int argc, char **argv) { char *input_file = NULL; char *output_file = NULL; char *compression = NULL; int file_version = tracecmd_default_file_version(); int c; if (argc < 2) usage(argv); if (strcmp(argv[1], "convert") != 0) usage(argv); for (;;) { int option_index = 0; static struct option long_options[] = { {"compression", required_argument, NULL, OPT_compression}, {"file-version", required_argument, NULL, OPT_file_version}, {"help", no_argument, NULL, '?'}, {NULL, 0, NULL, 0} }; c = getopt_long (argc-1, argv+1, "+hi:o:", long_options, &option_index); if (c == -1) break; switch (c) { case 'i': if (input_file) die("Only one input file is supported, %s already set", input_file); input_file = optarg; break; case 'o': if (output_file) die("Only one output file is supported, %s already set", output_file); output_file = optarg; break; case OPT_compression: if (strcmp(optarg, "any") && strcmp(optarg, "none") && !tracecmd_compress_is_supported(optarg, NULL)) die("Compression algorithm %s is not supported", optarg); compression = optarg; break; case OPT_file_version: file_version = atoi(optarg); if (file_version < FILE_VERSION_MIN || file_version > FILE_VERSION_MAX) die("Unsupported file version %d, " "supported versions are from %d to %d", file_version, FILE_VERSION_MIN, FILE_VERSION_MAX); break; case 'h': case '?': default: usage(argv); } } if ((argc - optind) >= 2) { if (output_file) usage(argv); output_file = argv[optind + 1]; } if (!input_file) input_file = DEFAULT_INPUT_FILE; if (!output_file) usage(argv); if (file_version >= FILE_VERSION_COMPRESSION && !compression) compression = "any"; convert_file(input_file, output_file, file_version, compression); } trace-cmd-v3.3.1/tracecmd/trace-dump.c000066400000000000000000000775301470231550600175510ustar00rootroot00000000000000// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt * * Updates: * Copyright (C) 2019, VMware, Tzvetomir Stoyanov */ #include #include #include #include #include #include #include "trace-local.h" #define TRACING_STR "tracing" #define HEAD_PAGE_STR "header_page" #define HEAD_PAGE_EVENT "header_event" #define HEAD_OPTIONS "options " #define HEAD_LATENCY "latency " #define HEAD_FLYRECORD "flyrecord" #define DUMP_SIZE 1024 static struct tep_handle *tep; static unsigned int trace_cpus; static int has_clock; static unsigned long file_version; static bool read_compress; static struct tracecmd_compression *compress; static char *meta_strings; static int meta_strings_size; enum dump_items { SUMMARY = (1 << 0), HEAD_PAGE = (1 << 1), HEAD_EVENT = (1 << 2), FTRACE_FORMAT = (1 << 3), EVENT_SYSTEMS = (1 << 4), EVENT_FORMAT = (1 << 5), KALLSYMS = (1 << 6), TRACE_PRINTK = (1 << 7), CMDLINES = (1 << 8), OPTIONS = (1 << 9), FLYRECORD = (1 << 10), CLOCK = (1 << 11), SECTIONS = (1 << 12), STRINGS = (1 << 13), }; struct file_section { int id; unsigned long long offset; struct file_section *next; enum dump_items verbosity; }; static struct file_section *sections; enum dump_items verbosity; #define DUMP_CHECK(X) ((X) & verbosity) #define do_print(ids, fmt, ...) \ do { \ if (!(ids) || DUMP_CHECK(ids)) \ tracecmd_plog(fmt, ##__VA_ARGS__); \ } while (0) static int read_fd(int fd, char *dst, int len) { size_t size = 0; int r; do { r = read(fd, dst+size, len); if (r > 0) { size += r; len -= r; } else break; } while (r > 0); if (len) return -1; return size; } static int read_compressed(int fd, char *dst, int len) { if (read_compress) return tracecmd_compress_buffer_read(compress, dst, len); return read_fd(fd, dst, len); } static int do_lseek(int fd, int offset, int whence) { if (read_compress) return tracecmd_compress_lseek(compress, offset, whence); return lseek(fd, offset, whence); } static int read_file_string(int fd, char *dst, int len) { size_t size = 0; int r; do { r = read_compressed(fd, dst+size, 1); if (r > 0) { size++; len--; } else break; if (!dst[size - 1]) break; } while (r > 0 && len); if (!size || dst[size - 1]) return -1; return 0; } static int read_file_bytes(int fd, char *dst, int len) { int ret; ret = read_compressed(fd, dst, len); return ret < 0 ? ret : 0; } static void read_dump_string(int fd, int size, enum dump_items id) { char buf[DUMP_SIZE]; int lsize; while (size) { lsize = (size < DUMP_SIZE) ? size : DUMP_SIZE - 1; if (read_file_bytes(fd, buf, lsize)) die("cannot read %d bytes", lsize); buf[lsize] = 0; do_print(id, "%s", buf); size -= lsize; } do_print(id, "\n"); } static int read_file_number(int fd, void *digit, int size) { unsigned long long val; char buf[8]; if (size > 8) return -1; if (read_file_bytes(fd, buf, size)) return -1; val = tep_read_number(tep, buf, size); switch (size) { case 1: *((char *)digit) = val; break; case 2: *((unsigned short *)digit) = val; break; case 4: *((unsigned int *)digit) = val; break; case 8: *((unsigned long long *)digit) = val; break; default: return -1; } return 0; } static const char *get_metadata_string(int offset) { if (!meta_strings || offset < 0 || meta_strings_size <= offset) return NULL; return meta_strings + offset; } static void dump_initial_format(int fd) { char magic[] = TRACECMD_MAGIC; char buf[DUMP_SIZE]; int val4; do_print(SUMMARY, "\t[Initial format]\n"); /* check initial bytes */ if (read_file_bytes(fd, buf, sizeof(magic))) die("cannot read %zu bytes magic", sizeof(magic)); if (memcmp(buf, magic, sizeof(magic)) != 0) die("wrong file magic"); /* check initial tracing string */ if (read_file_bytes(fd, buf, strlen(TRACING_STR))) die("cannot read %zu bytes tracing string", strlen(TRACING_STR)); buf[strlen(TRACING_STR)] = 0; if (strncmp(buf, TRACING_STR, strlen(TRACING_STR)) != 0) die("wrong tracing string: %s", buf); /* get file version */ if (read_file_string(fd, buf, DUMP_SIZE)) die("no version string"); do_print(SUMMARY, "\t\t%s\t[Version]\n", buf); file_version = strtol(buf, NULL, 10); if (!file_version && errno) die("Invalid file version string %s", buf); if (!tracecmd_is_version_supported(file_version)) die("Unsupported file version %lu", file_version); /* get file endianness*/ if (read_file_bytes(fd, buf, 1)) die("cannot read file endianness"); do_print(SUMMARY, "\t\t%d\t[%s endian]\n", buf[0], buf[0]?"Big":"Little"); tep_set_file_bigendian(tep, buf[0]); tep_set_local_bigendian(tep, tracecmd_host_bigendian()); /* get file bytes per long*/ if (read_file_bytes(fd, buf, 1)) die("cannot read file bytes per long"); do_print(SUMMARY, "\t\t%d\t[Bytes in a long]\n", buf[0]); if (read_file_number(fd, &val4, 4)) die("cannot read file page size"); do_print(SUMMARY, "\t\t%d\t[Page size, bytes]\n", val4); } static void dump_compress(int fd) { char zname[DUMP_SIZE]; char zver[DUMP_SIZE]; if (file_version < FILE_VERSION_COMPRESSION) return; /* get compression header */ if (read_file_string(fd, zname, DUMP_SIZE)) die("no compression header"); if (read_file_string(fd, zver, DUMP_SIZE)) die("no compression version"); do_print((SUMMARY), "\t\t%s\t[Compression algorithm]\n", zname); do_print((SUMMARY), "\t\t%s\t[Compression version]\n", zver); if (strcmp(zname, "none")) { compress = tracecmd_compress_alloc(zname, zver, fd, tep, NULL); if (!compress) die("cannot uncompress the file"); } } static void dump_header_page(int fd) { unsigned long long size; char buf[DUMP_SIZE]; do_print((SUMMARY | HEAD_PAGE), "\t[Header page, "); /* check header string */ if (read_file_bytes(fd, buf, strlen(HEAD_PAGE_STR) + 1)) die("cannot read %zu bytes header string", strlen(HEAD_PAGE_STR)); if (strncmp(buf, HEAD_PAGE_STR, strlen(HEAD_PAGE_STR)) != 0) die("wrong header string: %s", buf); if (read_file_number(fd, &size, 8)) die("cannot read the size of the page header information"); do_print((SUMMARY | HEAD_PAGE), "%lld bytes]\n", size); read_dump_string(fd, size, HEAD_PAGE); } static void dump_header_event(int fd) { unsigned long long size; char buf[DUMP_SIZE]; do_print((SUMMARY | HEAD_EVENT), "\t[Header event, "); /* check header string */ if (read_file_bytes(fd, buf, strlen(HEAD_PAGE_EVENT) + 1)) die("cannot read %zu bytes header string", strlen(HEAD_PAGE_EVENT)); if (strncmp(buf, HEAD_PAGE_EVENT, strlen(HEAD_PAGE_EVENT)) != 0) die("wrong header string: %s", buf); if (read_file_number(fd, &size, 8)) die("cannot read the size of the page header information"); do_print((SUMMARY | HEAD_EVENT), "%lld bytes]\n", size); read_dump_string(fd, size, HEAD_EVENT); } static void uncompress_reset(void) { if (compress && file_version >= FILE_VERSION_COMPRESSION) { read_compress = false; tracecmd_compress_reset(compress); } } static int uncompress_block(void) { int ret = 0; if (compress && file_version >= FILE_VERSION_COMPRESSION) { ret = tracecmd_uncompress_block(compress); if (!ret) read_compress = true; } return ret; } static void dump_ftrace_events_format(int fd) { unsigned long long size; unsigned int count; do_print((SUMMARY | FTRACE_FORMAT), "\t[Ftrace format, "); if (read_file_number(fd, &count, 4)) die("cannot read the count of the ftrace events"); do_print((SUMMARY | FTRACE_FORMAT), "%d events]\n", count); while (count) { if (read_file_number(fd, &size, 8)) die("cannot read the size of the %d ftrace event", count); read_dump_string(fd, size, FTRACE_FORMAT); count--; } } static void dump_events_format(int fd) { unsigned long long size; unsigned int systems; unsigned int events; char buf[DUMP_SIZE]; do_print((SUMMARY | EVENT_FORMAT | EVENT_SYSTEMS), "\t[Events format, "); if (read_file_number(fd, &systems, 4)) die("cannot read the count of the event systems"); do_print((SUMMARY | EVENT_FORMAT | EVENT_SYSTEMS), "%d systems]\n", systems); while (systems) { if (read_file_string(fd, buf, DUMP_SIZE)) die("cannot read the name of the %dth system", systems); if (read_file_number(fd, &events, 4)) die("cannot read the count of the events in system %s", buf); do_print(EVENT_SYSTEMS, "\t\t%s %d [system, events]\n", buf, events); while (events) { if (read_file_number(fd, &size, 8)) die("cannot read the format size of the %dth event from system %s", events, buf); read_dump_string(fd, size, EVENT_FORMAT); events--; } systems--; } } static void dump_kallsyms(int fd) { unsigned int size; do_print((SUMMARY | KALLSYMS), "\t[Kallsyms, "); if (read_file_number(fd, &size, 4)) die("cannot read the size of the kallsyms"); do_print((SUMMARY | KALLSYMS), "%d bytes]\n", size); read_dump_string(fd, size, KALLSYMS); } static void dump_printk(int fd) { unsigned int size; do_print((SUMMARY | TRACE_PRINTK), "\t[Trace printk, "); if (read_file_number(fd, &size, 4)) die("cannot read the size of the trace printk"); do_print((SUMMARY | TRACE_PRINTK), "%d bytes]\n", size); read_dump_string(fd, size, TRACE_PRINTK); } static void dump_cmdlines(int fd) { unsigned long long size; do_print((SUMMARY | CMDLINES), "\t[Saved command lines, "); if (read_file_number(fd, &size, 8)) die("cannot read the size of the saved command lines"); do_print((SUMMARY | CMDLINES), "%d bytes]\n", size); read_dump_string(fd, size, CMDLINES); } static void dump_cpus_count(int fd) { if (read_file_number(fd, &trace_cpus, 4)) die("cannot read the cpu count"); do_print(SUMMARY, "\t%d [CPUs with tracing data]\n", trace_cpus); } static void dump_option_string(int fd, int size, char *desc) { do_print(OPTIONS, "\t\t[Option %s, %d bytes]\n", desc, size); if (size) read_dump_string(fd, size, OPTIONS); } static void dump_section_header(int fd, enum dump_items v, unsigned short *flags) { unsigned long long offset, size; unsigned short fl; unsigned short id; const char *desc; int desc_id; offset = lseek(fd, 0, SEEK_CUR); if (read_file_number(fd, &id, 2)) die("cannot read the section id"); if (read_file_number(fd, &fl, 2)) die("cannot read the section flags"); if (read_file_number(fd, &desc_id, 4)) die("no section description"); desc = get_metadata_string(desc_id); if (!desc) desc = "Unknown"; if (read_file_number(fd, &size, 8)) die("cannot read section size"); do_print(v, "\t[Section %d @ %lld: \"%s\", flags 0x%X, %lld bytes]\n", id, offset, desc, fl, size); if (flags) *flags = fl; } static void dump_option_buffer(int fd, unsigned short option, int size) { unsigned long long total_size = 0; unsigned long long data_size; unsigned long long current; unsigned long long offset; unsigned short flags; char clock[DUMP_SIZE]; char name[DUMP_SIZE]; int page_size; int cpus = 0; int id; int i; if (size < 8) die("broken buffer option with size %d", size); if (read_file_number(fd, &offset, 8)) die("cannot read the offset of the buffer option"); if (read_file_string(fd, name, DUMP_SIZE)) die("cannot read the name of the buffer option"); if (file_version < FILE_VERSION_SECTIONS) { do_print(OPTIONS|FLYRECORD, "\t\t[Option BUFFER, %d bytes]\n", size); do_print(OPTIONS|FLYRECORD, "%lld [offset]\n", offset); do_print(OPTIONS|FLYRECORD, "\"%s\" [name]\n", name); return; } current = lseek(fd, 0, SEEK_CUR); if (lseek(fd, offset, SEEK_SET) == (off_t)-1) die("cannot goto buffer offset %lld", offset); dump_section_header(fd, FLYRECORD, &flags); if (lseek(fd, current, SEEK_SET) == (off_t)-1) die("cannot go back to buffer option"); do_print(OPTIONS|FLYRECORD, "\t\t[Option BUFFER, %d bytes]\n", size); do_print(OPTIONS|FLYRECORD, "%lld [offset]\n", offset); do_print(OPTIONS|FLYRECORD, "\"%s\" [name]\n", name); if (read_file_string(fd, clock, DUMP_SIZE)) die("cannot read clock of the buffer option"); do_print(OPTIONS|FLYRECORD, "\"%s\" [clock]\n", clock); if (option == TRACECMD_OPTION_BUFFER) { if (read_file_number(fd, &page_size, 4)) die("cannot read the page size of the buffer option"); do_print(OPTIONS|FLYRECORD, "%d [Page size, bytes]\n", page_size); if (read_file_number(fd, &cpus, 4)) die("cannot read the cpu count of the buffer option"); do_print(OPTIONS|FLYRECORD, "%d [CPUs]:\n", cpus); for (i = 0; i < cpus; i++) { if (read_file_number(fd, &id, 4)) die("cannot read the id of cpu %d from the buffer option", i); if (read_file_number(fd, &offset, 8)) die("cannot read the offset of cpu %d from the buffer option", i); if (read_file_number(fd, &data_size, 8)) die("cannot read the data size of cpu %d from the buffer option", i); total_size += data_size; do_print(OPTIONS|FLYRECORD, " %d %lld\t%lld\t[id, data offset and size]\n", id, offset, data_size); } do_print(SUMMARY, "\t\[buffer \"%s\", \"%s\" clock, %d page size, " "%d cpus, %lld bytes flyrecord data]\n", name, clock, page_size, cpus, total_size); } else { do_print(SUMMARY, "\t\[buffer \"%s\", \"%s\" clock, latency data]\n", name, clock); } } static void dump_option_int(int fd, int size, char *desc) { int val; do_print(OPTIONS, "\t\t[Option %s, %d bytes]\n", desc, size); read_file_number(fd, &val, size); do_print(OPTIONS, "%d\n", val); } static void dump_option_xlong(int fd, int size, char *desc) { long long val; do_print(OPTIONS, "\t\t[Option %s, %d bytes]\n", desc, size); read_file_number(fd, &val, size); do_print(OPTIONS, "0x%llX\n", val); } struct time_shift_cpu { unsigned int count; long long *scalings; long long *frac; long long *offsets; unsigned long long *times; }; static void dump_option_timeshift(int fd, int size) { struct time_shift_cpu *cpus_data; long long trace_id; unsigned int flags; unsigned int cpus; int i, j; /* * long long int (8 bytes) trace session ID * int (4 bytes) count of timestamp offsets. * long long array of size [count] of times, * when the offsets were calculated. * long long array of size [count] of timestamp offsets. */ if (size < 12) { do_print(OPTIONS, "Broken time shift option, size %s", size); return; } do_print(OPTIONS, "\t\t[Option TimeShift, %d bytes]\n", size); read_file_number(fd, &trace_id, 8); size -= 8; do_print(OPTIONS, "0x%llX [peer's trace id]\n", trace_id); read_file_number(fd, &flags, 4); size -= 4; do_print(OPTIONS, "0x%llX [peer's protocol flags]\n", flags); read_file_number(fd, &cpus, 4); size -= 4; do_print(OPTIONS, "0x%llX [peer's CPU count]\n", cpus); cpus_data = calloc(cpus, sizeof(struct time_shift_cpu)); if (!cpus_data) return; for (j = 0; j < cpus; j++) { if (size < 4) goto out; read_file_number(fd, &cpus_data[j].count, 4); size -= 4; do_print(OPTIONS, "%lld [samples count for CPU %d]\n", cpus_data[j].count, j); cpus_data[j].times = calloc(cpus_data[j].count, sizeof(long long)); cpus_data[j].offsets = calloc(cpus_data[j].count, sizeof(long long)); cpus_data[j].scalings = calloc(cpus_data[j].count, sizeof(long long)); cpus_data[j].frac = calloc(cpus_data[j].count, sizeof(long long)); if (!cpus_data[j].times || !cpus_data[j].offsets || !cpus_data[j].scalings || !cpus_data[j].frac) goto out; for (i = 0; i < cpus_data[j].count; i++) { if (size < 8) goto out; read_file_number(fd, cpus_data[j].times + i, 8); size -= 8; } for (i = 0; i < cpus_data[j].count; i++) { if (size < 8) goto out; read_file_number(fd, cpus_data[j].offsets + i, 8); size -= 8; } for (i = 0; i < cpus_data[j].count; i++) { if (size < 8) goto out; read_file_number(fd, cpus_data[j].scalings + i, 8); size -= 8; } } if (size > 0) { for (j = 0; j < cpus; j++) { if (!cpus_data[j].frac) goto out; for (i = 0; i < cpus_data[j].count; i++) { if (size < 8) goto out; read_file_number(fd, cpus_data[j].frac + i, 8); size -= 8; } } } for (j = 0; j < cpus; j++) { for (i = 0; i < cpus_data[j].count; i++) do_print(OPTIONS, "\t%lld %lld %llu %llu[offset * scaling >> fraction @ time]\n", cpus_data[j].offsets[i], cpus_data[j].scalings[i], cpus_data[j].frac[i], cpus_data[j].times[i]); } out: if (j < cpus) do_print(OPTIONS, "Broken time shift option\n"); for (j = 0; j < cpus; j++) { free(cpus_data[j].times); free(cpus_data[j].offsets); free(cpus_data[j].scalings); free(cpus_data[j].frac); } free(cpus_data); } void dump_option_guest(int fd, int size) { unsigned long long trace_id; char *buf, *p; int cpu, pid; int cpus; int i; do_print(OPTIONS, "\t\t[Option GUEST, %d bytes]\n", size); /* * Guest name, null terminated string * long long (8 bytes) trace-id * int (4 bytes) number of guest CPUs * array of size number of guest CPUs: * int (4 bytes) Guest CPU id * int (4 bytes) Host PID, running the guest CPU */ buf = calloc(1, size); if (!buf) return; if (read_file_bytes(fd, buf, size)) goto out; p = buf; do_print(OPTIONS, "%s [Guest name]\n", p); size -= strlen(buf) + 1; p += strlen(buf) + 1; if (size < sizeof(long long)) goto out; trace_id = tep_read_number(tep, p, sizeof(long long)); size -= sizeof(long long); p += sizeof(long long); do_print(OPTIONS, "0x%llX [trace id]\n", trace_id); if (size < sizeof(int)) goto out; cpus = tep_read_number(tep, p, sizeof(int)); size -= sizeof(int); p += sizeof(int); do_print(OPTIONS, "%d [Guest CPUs]\n", cpus); for (i = 0; i < cpus; i++) { if (size < 2 * sizeof(int)) goto out; cpu = tep_read_number(tep, p, sizeof(int)); size -= sizeof(int); p += sizeof(int); pid = tep_read_number(tep, p, sizeof(int)); size -= sizeof(int); p += sizeof(int); do_print(OPTIONS, " %d %d [guest cpu, host pid]\n", cpu, pid); } out: free(buf); } void dump_option_tsc2nsec(int fd, int size) { int mult, shift; unsigned long long offset; do_print(OPTIONS, "\n\t\t[Option TSC2NSEC, %d bytes]\n", size); if (read_file_number(fd, &mult, 4)) die("cannot read tsc2nsec multiplier"); if (read_file_number(fd, &shift, 4)) die("cannot read tsc2nsec shift"); if (read_file_number(fd, &offset, 8)) die("cannot read tsc2nsec offset"); do_print(OPTIONS, "%d %d %llu [multiplier, shift, offset]\n", mult, shift, offset); } static void dump_option_section(int fd, unsigned int size, unsigned short id, char *desc, enum dump_items v) { struct file_section *sec; sec = calloc(1, sizeof(struct file_section)); if (!sec) die("cannot allocate new section"); sec->next = sections; sections = sec; sec->id = id; sec->verbosity = v; if (read_file_number(fd, &sec->offset, 8)) die("cannot read the option %d offset", id); do_print(OPTIONS, "\t\t[Option %s, %d bytes] @ %lld\n", desc, size, sec->offset); } static void dump_sections(int fd, int count) { struct file_section *sec = sections; unsigned short flags; while (sec) { if (lseek(fd, sec->offset, SEEK_SET) == (off_t)-1) die("cannot goto option offset %lld", sec->offset); dump_section_header(fd, sec->verbosity, &flags); if ((flags & TRACECMD_SEC_FL_COMPRESS) && uncompress_block()) die("cannot uncompress section block"); switch (sec->id) { case TRACECMD_OPTION_HEADER_INFO: dump_header_page(fd); dump_header_event(fd); break; case TRACECMD_OPTION_FTRACE_EVENTS: dump_ftrace_events_format(fd); break; case TRACECMD_OPTION_EVENT_FORMATS: dump_events_format(fd); break; case TRACECMD_OPTION_KALLSYMS: dump_kallsyms(fd); break; case TRACECMD_OPTION_PRINTK: dump_printk(fd); break; case TRACECMD_OPTION_CMDLINES: dump_cmdlines(fd); break; } uncompress_reset(); sec = sec->next; } do_print(SUMMARY|SECTIONS, "\t[%d sections]\n", count); } static int dump_options_read(int fd); static int dump_option_done(int fd, int size) { unsigned long long offset; do_print(OPTIONS, "\t\t[Option DONE, %d bytes]\n", size); if (file_version < FILE_VERSION_SECTIONS || size < 8) return 0; if (read_file_number(fd, &offset, 8)) die("cannot read the next options offset"); do_print(OPTIONS, "%lld\n", offset); if (!offset) return 0; if (lseek(fd, offset, SEEK_SET) == (off_t)-1) die("cannot goto next options offset %lld", offset); do_print(OPTIONS, "\n\n"); return dump_options_read(fd); } static int dump_options_read(int fd) { unsigned short flags = 0; unsigned short option; unsigned int size; int count = 0; if (file_version >= FILE_VERSION_SECTIONS) dump_section_header(fd, OPTIONS, &flags); if ((flags & TRACECMD_SEC_FL_COMPRESS) && uncompress_block()) die("cannot uncompress file block"); for (;;) { if (read_file_number(fd, &option, 2)) die("cannot read the option id"); if (option == TRACECMD_OPTION_DONE && file_version < FILE_VERSION_SECTIONS) break; if (read_file_number(fd, &size, 4)) die("cannot read the option size"); count++; switch (option) { case TRACECMD_OPTION_DATE: dump_option_string(fd, size, "DATE"); break; case TRACECMD_OPTION_CPUSTAT: dump_option_string(fd, size, "CPUSTAT"); break; case TRACECMD_OPTION_BUFFER: case TRACECMD_OPTION_BUFFER_TEXT: dump_option_buffer(fd, option, size); break; case TRACECMD_OPTION_TRACECLOCK: do_print(OPTIONS, "\t\t[Option TRACECLOCK, %d bytes]\n", size); read_dump_string(fd, size, OPTIONS | CLOCK); has_clock = 1; break; case TRACECMD_OPTION_UNAME: dump_option_string(fd, size, "UNAME"); break; case TRACECMD_OPTION_HOOK: dump_option_string(fd, size, "HOOK"); break; case TRACECMD_OPTION_OFFSET: dump_option_string(fd, size, "OFFSET"); break; case TRACECMD_OPTION_CPUCOUNT: dump_option_int(fd, size, "CPUCOUNT"); break; case TRACECMD_OPTION_VERSION: dump_option_string(fd, size, "VERSION"); break; case TRACECMD_OPTION_PROCMAPS: dump_option_string(fd, size, "PROCMAPS"); break; case TRACECMD_OPTION_TRACEID: dump_option_xlong(fd, size, "TRACEID"); break; case TRACECMD_OPTION_TIME_SHIFT: dump_option_timeshift(fd, size); break; case TRACECMD_OPTION_GUEST: dump_option_guest(fd, size); break; case TRACECMD_OPTION_TSC2NSEC: dump_option_tsc2nsec(fd, size); break; case TRACECMD_OPTION_HEADER_INFO: dump_option_section(fd, size, option, "HEADERS", HEAD_PAGE | HEAD_EVENT); break; case TRACECMD_OPTION_FTRACE_EVENTS: dump_option_section(fd, size, option, "FTRACE EVENTS", FTRACE_FORMAT); break; case TRACECMD_OPTION_EVENT_FORMATS: dump_option_section(fd, size, option, "EVENT FORMATS", EVENT_SYSTEMS | EVENT_FORMAT); break; case TRACECMD_OPTION_KALLSYMS: dump_option_section(fd, size, option, "KALLSYMS", KALLSYMS); break; case TRACECMD_OPTION_PRINTK: dump_option_section(fd, size, option, "PRINTK", TRACE_PRINTK); break; case TRACECMD_OPTION_CMDLINES: dump_option_section(fd, size, option, "CMDLINES", CMDLINES); break; case TRACECMD_OPTION_DONE: uncompress_reset(); count += dump_option_done(fd, size); return count; default: do_print(OPTIONS, " %d %d\t[Unknown option, size - skipping]\n", option, size); do_lseek(fd, size, SEEK_CUR); break; } } uncompress_reset(); return count; } static void dump_options(int fd) { int count; count = dump_options_read(fd); do_print(SUMMARY|OPTIONS, "\t[%d options]\n", count); } static void dump_latency(int fd) { do_print(SUMMARY, "\t[Latency tracing data]\n"); } static void dump_clock(int fd) { long long size; char *clock; do_print((SUMMARY | CLOCK), "\t[Tracing clock]\n"); if (!has_clock) { do_print((SUMMARY | CLOCK), "\t\t No tracing clock saved in the file\n"); return; } if (read_file_number(fd, &size, 8)) die("cannot read clock size"); clock = calloc(1, size); if (!clock) die("cannot allocate clock %lld bytes", size); if (read_file_bytes(fd, clock, size)) die("cannot read clock %lld bytes", size); clock[size] = 0; do_print((SUMMARY | CLOCK), "\t\t%s\n", clock); free(clock); } static void dump_flyrecord(int fd) { long long cpu_offset; long long cpu_size; int i; do_print((SUMMARY | FLYRECORD), "\t[Flyrecord tracing data]\n"); for (i = 0; i < trace_cpus; i++) { if (read_file_number(fd, &cpu_offset, 8)) die("cannot read the cpu %d offset", i); if (read_file_number(fd, &cpu_size, 8)) die("cannot read the cpu %d size", i); do_print(FLYRECORD, "\t %10.lld %10.lld\t[offset, size of cpu %d]\n", cpu_offset, cpu_size, i); } dump_clock(fd); } static void dump_therest(int fd) { char str[10]; for (;;) { if (read_file_bytes(fd, str, 10)) die("cannot read the rest of the header"); if (strncmp(str, HEAD_OPTIONS, 10) == 0) dump_options(fd); else if (strncmp(str, HEAD_LATENCY, 10) == 0) dump_latency(fd); else if (strncmp(str, HEAD_FLYRECORD, 10) == 0) dump_flyrecord(fd); else { lseek(fd, -10, SEEK_CUR); break; } } } static void dump_v6_file(int fd) { dump_header_page(fd); dump_header_event(fd); dump_ftrace_events_format(fd); dump_events_format(fd); dump_kallsyms(fd); dump_printk(fd); dump_cmdlines(fd); dump_cpus_count(fd); dump_therest(fd); } static int read_metadata_strings(int fd, unsigned long long size) { char *str, *strings; int psize; int ret; strings = realloc(meta_strings, meta_strings_size + size); if (!strings) return -1; meta_strings = strings; ret = read_file_bytes(fd, meta_strings + meta_strings_size, size); if (ret < 0) return -1; do_print(STRINGS, "\t[String @ offset]\n"); psize = 0; while (psize < size) { str = meta_strings + meta_strings_size + psize; do_print(STRINGS, "\t\t\"%s\" @ %d\n", str, meta_strings_size + psize); psize += strlen(str) + 1; } meta_strings_size += size; return 0; } static void get_meta_strings(int fd) { unsigned long long offset, size; unsigned int csize, rsize; unsigned short fl, id; int desc_id; offset = lseek(fd, 0, SEEK_CUR); do { if (read_file_number(fd, &id, 2)) break; if (read_file_number(fd, &fl, 2)) die("cannot read section flags"); if (read_file_number(fd, &desc_id, 4)) die("cannot read section description"); if (read_file_number(fd, &size, 8)) die("cannot read section size"); if (id == TRACECMD_OPTION_STRINGS) { if ((fl & TRACECMD_SEC_FL_COMPRESS)) { read_file_number(fd, &csize, 4); read_file_number(fd, &rsize, 4); lseek(fd, -8, SEEK_CUR); if (uncompress_block()) break; } else { rsize = size; } read_metadata_strings(fd, rsize); uncompress_reset(); } else { if (lseek(fd, size, SEEK_CUR) == (off_t)-1) break; } } while (1); if (lseek(fd, offset, SEEK_SET) == (off_t)-1) die("cannot restore the original file location"); } static int walk_v7_sections(int fd) { unsigned long long offset, soffset, size; unsigned short fl; unsigned short id; int csize, rsize; int count = 0; int desc_id; const char *desc; offset = lseek(fd, 0, SEEK_CUR); do { soffset = lseek(fd, 0, SEEK_CUR); if (read_file_number(fd, &id, 2)) break; if (read_file_number(fd, &fl, 2)) die("cannot read section flags"); if (read_file_number(fd, &desc_id, 4)) die("cannot read section description"); desc = get_metadata_string(desc_id); if (!desc) desc = "Unknown"; if (read_file_number(fd, &size, 8)) die("cannot read section size"); if (id >= TRACECMD_OPTION_MAX) do_print(SECTIONS, "Unknown section id %d: %s", id, desc); count++; if (fl & TRACECMD_SEC_FL_COMPRESS) { if (id == TRACECMD_OPTION_BUFFER || id == TRACECMD_OPTION_BUFFER_TEXT) { do_print(SECTIONS, "\t[Section %2d @ %-16lld\t\"%s\", flags 0x%X, " "%lld compressed bytes]\n", id, soffset, desc, fl, size); } else { if (read_file_number(fd, &csize, 4)) die("cannot read section size"); if (read_file_number(fd, &rsize, 4)) die("cannot read section size"); do_print(SECTIONS, "\t[Section %2d @ %-16lld\t\"%s\", flags 0x%X, " "%d compressed, %d uncompressed]\n", id, soffset, desc, fl, csize, rsize); size -= 8; } } else { do_print(SECTIONS, "\t[Section %2d @ %-16lld\t\"%s\", flags 0x%X, %lld bytes]\n", id, soffset, desc, fl, size); } if (lseek(fd, size, SEEK_CUR) == (off_t)-1) break; } while (1); if (lseek(fd, offset, SEEK_SET) == (off_t)-1) die("cannot restore the original file location"); return count; } static void dump_v7_file(int fd) { long long offset; int sections; if (read_file_number(fd, &offset, 8)) die("cannot read offset of the first option section"); get_meta_strings(fd); sections = walk_v7_sections(fd); if (lseek(fd, offset, SEEK_SET) == (off_t)-1) die("cannot goto options offset %lld", offset); dump_options(fd); dump_sections(fd, sections); } static void free_sections(void) { struct file_section *del; while (sections) { del = sections; sections = sections->next; free(del); } } static void dump_file(const char *file) { int fd; tep = tep_alloc(); if (!tep) return; fd = open(file, O_RDONLY); if (fd < 0) die("cannot open '%s'\n", file); do_print(SUMMARY, "\n Tracing meta data in file %s:\n", file); dump_initial_format(fd); dump_compress(fd); if (file_version < FILE_VERSION_SECTIONS) dump_v6_file(fd); else dump_v7_file(fd); free_sections(); tep_free(tep); tep = NULL; close(fd); } enum { OPT_sections = 240, OPT_strings = 241, OPT_verbose = 242, OPT_clock = 243, OPT_all = 244, OPT_summary = 245, OPT_flyrecord = 246, OPT_options = 247, OPT_cmd_lines = 248, OPT_printk = 249, OPT_kallsyms = 250, OPT_events = 251, OPT_systems = 252, OPT_ftrace = 253, OPT_head_event = 254, OPT_head_page = 255, }; void trace_dump(int argc, char **argv) { char *input_file = NULL; bool validate = false; int c; if (argc < 2) usage(argv); if (strcmp(argv[1], "dump") != 0) usage(argv); for (;;) { int option_index = 0; static struct option long_options[] = { {"all", no_argument, NULL, OPT_all}, {"summary", no_argument, NULL, OPT_summary}, {"head-page", no_argument, NULL, OPT_head_page}, {"head-event", no_argument, NULL, OPT_head_event}, {"ftrace-events", no_argument, NULL, OPT_ftrace}, {"systems", no_argument, NULL, OPT_systems}, {"events", no_argument, NULL, OPT_events}, {"kallsyms", no_argument, NULL, OPT_kallsyms}, {"printk", no_argument, NULL, OPT_printk}, {"cmd-lines", no_argument, NULL, OPT_cmd_lines}, {"options", no_argument, NULL, OPT_options}, {"flyrecord", no_argument, NULL, OPT_flyrecord}, {"clock", no_argument, NULL, OPT_clock}, {"strings", no_argument, NULL, OPT_strings}, {"sections", no_argument, NULL, OPT_sections}, {"validate", no_argument, NULL, 'v'}, {"help", no_argument, NULL, '?'}, {"verbose", optional_argument, NULL, OPT_verbose}, {NULL, 0, NULL, 0} }; c = getopt_long (argc-1, argv+1, "+hvai:", long_options, &option_index); if (c == -1) break; switch (c) { case 'h': usage(argv); break; case 'i': input_file = optarg; break; case 'v': validate = true; break; case OPT_all: verbosity = 0xFFFFFFFF; break; case OPT_summary: verbosity |= SUMMARY; break; case OPT_flyrecord: verbosity |= FLYRECORD; break; case OPT_options: verbosity |= OPTIONS; break; case OPT_cmd_lines: verbosity |= CMDLINES; break; case OPT_printk: verbosity |= TRACE_PRINTK; break; case OPT_kallsyms: verbosity |= KALLSYMS; break; case OPT_events: verbosity |= EVENT_FORMAT; break; case OPT_systems: verbosity |= EVENT_SYSTEMS; break; case OPT_ftrace: verbosity |= FTRACE_FORMAT; break; case OPT_head_event: verbosity |= HEAD_EVENT; break; case OPT_head_page: verbosity |= HEAD_PAGE; break; case OPT_clock: verbosity |= CLOCK; break; case OPT_verbose: if (trace_set_verbose(optarg) < 0) die("invalid verbose level %s", optarg); break; case OPT_strings: verbosity |= STRINGS; break; case OPT_sections: verbosity |= SECTIONS; break; default: usage(argv); } } if ((argc - optind) >= 2) { if (input_file) usage(argv); input_file = argv[optind + 1]; } if (!input_file) input_file = DEFAULT_INPUT_FILE; if (!verbosity && !validate) verbosity = SUMMARY; dump_file(input_file); if (validate) tracecmd_plog("File %s is a valid trace-cmd file\n", input_file); } trace-cmd-v3.3.1/tracecmd/trace-hist.c000066400000000000000000000570451470231550600175520ustar00rootroot00000000000000// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2013 Red Hat Inc, Steven Rostedt * * Several of the ideas in this file came from Arnaldo Carvalho de Melo's * work on the perf ui. */ #include #include #include #include #include #include #include "trace-hash-local.h" #include "trace-local.h" #include "list.h" static int sched_wakeup_type; static int sched_wakeup_new_type; static int sched_switch_type; static int function_type; static int function_graph_entry_type; static int function_graph_exit_type; static int kernel_stack_type; static int long_size; static struct tep_format_field *common_type_hist; static struct tep_format_field *common_pid_field; static struct tep_format_field *sched_wakeup_comm_field; static struct tep_format_field *sched_wakeup_new_comm_field; static struct tep_format_field *sched_wakeup_pid_field; static struct tep_format_field *sched_wakeup_new_pid_field; static struct tep_format_field *sched_switch_prev_field; static struct tep_format_field *sched_switch_next_field; static struct tep_format_field *sched_switch_prev_pid_field; static struct tep_format_field *sched_switch_next_pid_field; static struct tep_format_field *function_ip_field; static struct tep_format_field *function_parent_ip_field; static struct tep_format_field *function_graph_entry_func_field; static struct tep_format_field *function_graph_entry_depth_field; static struct tep_format_field *function_graph_exit_func_field; static struct tep_format_field *function_graph_exit_depth_field; static struct tep_format_field *function_graph_exit_calltime_field; static struct tep_format_field *function_graph_exit_rettime_field; static struct tep_format_field *function_graph_exit_overrun_field; static struct tep_format_field *kernel_stack_caller_field; static int compact; static void *zalloc(size_t size) { return calloc(1, size); } static const char **ips; static int ips_idx; static int func_depth; static int current_pid = -1; struct stack_save { struct stack_save *next; const char **ips; int ips_idx; int func_depth; int pid; }; struct stack_save *saved_stacks; static void reset_stack(void) { current_pid = -1; ips_idx = 0; func_depth = 0; /* Don't free here, it may be saved */ ips = NULL; } static void save_stack(void) { struct stack_save *stack; stack = zalloc(sizeof(*stack)); if (!stack) die("malloc"); stack->pid = current_pid; stack->ips_idx = ips_idx; stack->func_depth = func_depth; stack->ips = ips; stack->next = saved_stacks; saved_stacks = stack; reset_stack(); } static void restore_stack(int pid) { struct stack_save *last = NULL, *stack; for (stack = saved_stacks; stack; last = stack, stack = stack->next) { if (stack->pid == pid) break; } if (!stack) return; if (last) last->next = stack->next; else saved_stacks = stack->next; current_pid = stack->pid; ips_idx = stack->ips_idx; func_depth = stack->func_depth; free(ips); ips = stack->ips; free(stack); } struct pid_list; struct chain { struct chain *next; struct chain *sibling; const char *func; struct chain *parents; struct pid_list *pid_list; int nr_parents; int count; int total; int event; }; static struct chain *chains; static int nr_chains; static int total_counts; struct pid_list { struct pid_list *next; struct chain chain; int pid; }; static struct pid_list *list_pids; static struct pid_list all_pid_list; static void add_chain(struct chain *chain) { if (chain->next) die("chain not null?"); chain->next = chains; chains = chain; nr_chains++; } static void insert_chain(struct pid_list *pid_list, struct chain *chain_list, const char **chain_str, int size, int event) { struct chain *chain; /* Record all counts */ if (!chain_list->func) total_counts++; chain_list->count++; if (!size--) return; for (chain = chain_list->parents; chain; chain = chain->sibling) { if (chain->func == chain_str[size]) { insert_chain(pid_list, chain, chain_str, size, 0); return; } } chain_list->nr_parents++; chain = zalloc(sizeof(struct chain)); if (!chain) die("malloc"); chain->sibling = chain_list->parents; chain_list->parents = chain; chain->func = chain_str[size]; chain->pid_list = pid_list; chain->event = event; /* NULL func means this is the top level of the chain. Store it */ if (!chain_list->func) add_chain(chain); insert_chain(pid_list, chain, chain_str, size, 0); } static void save_call_chain(int pid, const char **chain, int size, int event) { static struct pid_list *pid_list; if (compact) pid_list = &all_pid_list; else if (!pid_list || pid_list->pid != pid) { for (pid_list = list_pids; pid_list; pid_list = pid_list->next) { if (pid_list->pid == pid) break; } if (!pid_list) { pid_list = zalloc(sizeof(*pid_list)); if (!pid_list) die("malloc"); pid_list->pid = pid; pid_list->next = list_pids; list_pids = pid_list; } } insert_chain(pid_list, &pid_list->chain, chain, size, event); } static void save_stored_stacks(void) { while (saved_stacks) { restore_stack(saved_stacks->pid); save_call_chain(current_pid, ips, ips_idx, 0); } } static void flush_stack(void) { if (current_pid < 0) return; save_call_chain(current_pid, ips, ips_idx, 0); free(ips); reset_stack(); } static void push_stack_func(const char *func) { ips_idx++; ips = realloc(ips, ips_idx * sizeof(char *)); ips[ips_idx - 1] = func; } static void pop_stack_func(void) { ips_idx--; ips[ips_idx] = NULL; } static void process_function(struct tep_handle *pevent, struct tep_record *record) { unsigned long long parent_ip; unsigned long long ip; unsigned long long val; const char *parent; const char *func; int pid; int ret; ret = tep_read_number_field(common_pid_field, record->data, &val); if (ret < 0) die("no pid field for function?"); ret = tep_read_number_field(function_ip_field, record->data, &ip); if (ret < 0) die("no ip field for function?"); ret = tep_read_number_field(function_parent_ip_field, record->data, &parent_ip); if (ret < 0) die("no parent ip field for function?"); pid = val; func = tep_find_function(pevent, ip); parent = tep_find_function(pevent, parent_ip); if (current_pid >= 0 && pid != current_pid) { save_stack(); restore_stack(pid); } current_pid = pid; if (ips_idx) { if (ips[ips_idx - 1] == parent) push_stack_func(func); else { save_call_chain(pid, ips, ips_idx, 0); while (ips_idx) { pop_stack_func(); if (ips[ips_idx - 1] == parent) { push_stack_func(func); break; } } } } /* The above check can set ips_idx to zero again */ if (!ips_idx) { push_stack_func(parent); push_stack_func(func); } } static void process_function_graph_entry(struct tep_handle *pevent, struct tep_record *record) { unsigned long long depth; unsigned long long ip; unsigned long long val; const char *func; int pid; int ret; ret = tep_read_number_field(common_pid_field, record->data, &val); if (ret < 0) die("no pid field for function graph entry?"); ret = tep_read_number_field(function_graph_entry_func_field, record->data, &ip); if (ret < 0) die("no ip field for function graph entry?"); ret = tep_read_number_field(function_graph_entry_depth_field, record->data, &depth); if (ret < 0) die("no parent ip field for function entry?"); pid = val; func = tep_find_function(pevent, ip); if (current_pid >= 0 && pid != current_pid) { save_stack(); restore_stack(pid); } current_pid = pid; if (depth != ips_idx) { save_call_chain(pid, ips, ips_idx, 0); while (ips_idx > depth) pop_stack_func(); } func_depth = depth; push_stack_func(func); } static void process_function_graph_exit(struct tep_handle *pevent, struct tep_record *record) { unsigned long long depth; unsigned long long val; int pid; int ret; ret = tep_read_number_field(common_pid_field, record->data, &val); if (ret < 0) die("no pid field for function graph exit?"); ret = tep_read_number_field(function_graph_exit_depth_field, record->data, &depth); if (ret < 0) die("no parent ip field for function?"); pid = val; if (current_pid >= 0 && pid != current_pid) { save_stack(); restore_stack(pid); } current_pid = pid; if (ips_idx != depth) { save_call_chain(pid, ips, ips_idx, 0); while (ips_idx > depth) pop_stack_func(); } func_depth = depth - 1; } static int pending_pid = -1; static const char **pending_ips; static int pending_ips_idx; static void reset_pending_stack(void) { pending_pid = -1; pending_ips_idx = 0; free(pending_ips); pending_ips = NULL; } static void copy_stack_to_pending(int pid) { pending_pid = pid; pending_ips = zalloc(sizeof(char *) * ips_idx); memcpy(pending_ips, ips, sizeof(char *) * ips_idx); pending_ips_idx = ips_idx; } static void process_kernel_stack(struct tep_handle *pevent, struct tep_record *record) { struct tep_format_field *field = kernel_stack_caller_field; unsigned long long val; void *data = record->data; int do_restore = 0; int pid; int ret; ret = tep_read_number_field(common_pid_field, record->data, &val); if (ret < 0) die("no pid field for function?"); pid = val; if (pending_pid >= 0 && pid != pending_pid) { reset_pending_stack(); return; } if (!field) die("no caller field for kernel stack?"); if (pending_pid >= 0) { if (current_pid >= 0) { save_stack(); do_restore = 1; } } else { /* function stack trace? */ if (current_pid >= 0) { copy_stack_to_pending(current_pid); free(ips); reset_stack(); } } current_pid = pid; /* Need to start at the end of the callers and work up */ for (data += field->offset; data < record->data + record->size; data += long_size) { unsigned long long addr; addr = tep_read_number(pevent, data, long_size); if ((long_size == 8 && addr == (unsigned long long)-1) || ((int)addr == -1)) break; } for (data -= long_size; data >= record->data + field->offset; data -= long_size) { unsigned long long addr; const char *func; addr = tep_read_number(pevent, data, long_size); func = tep_find_function(pevent, addr); if (func) push_stack_func(func); } if (pending_pid >= 0) { push_stack_func(pending_ips[pending_ips_idx - 1]); reset_pending_stack(); } save_call_chain(current_pid, ips, ips_idx, 1); if (do_restore) restore_stack(current_pid); } static void process_sched_wakeup(struct tep_handle *pevent, struct tep_record *record, int type) { unsigned long long val; const char *comm; int pid; int ret; if (type == sched_wakeup_type) { comm = (char *)(record->data + sched_wakeup_comm_field->offset); ret = tep_read_number_field(sched_wakeup_pid_field, record->data, &val); if (ret < 0) die("no pid field in sched_wakeup?"); } else { comm = (char *)(record->data + sched_wakeup_new_comm_field->offset); ret = tep_read_number_field(sched_wakeup_new_pid_field, record->data, &val); if (ret < 0) die("no pid field in sched_wakeup_new?"); } pid = val; tep_register_comm(pevent, comm, pid); } static void process_sched_switch(struct tep_handle *pevent, struct tep_record *record) { unsigned long long val; const char *comm; int pid; int ret; comm = (char *)(record->data + sched_switch_prev_field->offset); ret = tep_read_number_field(sched_switch_prev_pid_field, record->data, &val); if (ret < 0) die("no prev_pid field in sched_switch?"); pid = val; tep_register_comm(pevent, comm, pid); comm = (char *)(record->data + sched_switch_next_field->offset); ret = tep_read_number_field(sched_switch_next_pid_field, record->data, &val); if (ret < 0) die("no next_pid field in sched_switch?"); pid = val; tep_register_comm(pevent, comm, pid); } static void process_event(struct tep_handle *pevent, struct tep_record *record, int type) { struct tep_event *event; const char *event_name; unsigned long long val; int pid; int ret; if (pending_pid >= 0) { save_call_chain(pending_pid, pending_ips, pending_ips_idx, 1); reset_pending_stack(); } event = tep_find_event(pevent, type); event_name = event->name; ret = tep_read_number_field(common_pid_field, record->data, &val); if (ret < 0) die("no pid field for function?"); pid = val; /* * Even if function or function graph tracer is running, * if the user ran with stack traces on events, we want to use * that instead. But unfortunately, that stack doesn't come * until after the event. Thus, we only add the event into * the pending stack. */ push_stack_func(event_name); copy_stack_to_pending(pid); pop_stack_func(); } static void process_record(struct tep_handle *pevent, struct tep_record *record) { unsigned long long val; int type; tep_read_number_field(common_type_hist, record->data, &val); type = val; if (type == function_type) return process_function(pevent, record); if (type == function_graph_entry_type) return process_function_graph_entry(pevent, record); if (type == function_graph_exit_type) return process_function_graph_exit(pevent, record); if (type == kernel_stack_type) return process_kernel_stack(pevent, record); if (type == sched_wakeup_type || type == sched_wakeup_new_type) process_sched_wakeup(pevent, record, type); else if (type == sched_switch_type) process_sched_switch(pevent, record); process_event(pevent, record, type); } static struct tep_event * update_event(struct tep_handle *pevent, const char *sys, const char *name, int *id) { struct tep_event *event; event = tep_find_event_by_name(pevent, sys, name); if (!event) return NULL; *id = event->id; return event; } static void update_sched_wakeup(struct tep_handle *pevent) { struct tep_event *event; event = update_event(pevent, "sched", "sched_wakeup", &sched_wakeup_type); if (!event) return; sched_wakeup_comm_field = tep_find_field(event, "comm"); sched_wakeup_pid_field = tep_find_field(event, "pid"); } static void update_sched_wakeup_new(struct tep_handle *pevent) { struct tep_event *event; event = update_event(pevent, "sched", "sched_wakeup_new", &sched_wakeup_new_type); if (!event) return; sched_wakeup_new_comm_field = tep_find_field(event, "comm"); sched_wakeup_new_pid_field = tep_find_field(event, "pid"); } static void update_sched_switch(struct tep_handle *pevent) { struct tep_event *event; event = update_event(pevent, "sched", "sched_switch", &sched_switch_type); if (!event) return; sched_switch_prev_field = tep_find_field(event, "prev_comm"); sched_switch_next_field = tep_find_field(event, "next_comm"); sched_switch_prev_pid_field = tep_find_field(event, "prev_pid"); sched_switch_next_pid_field = tep_find_field(event, "next_pid"); } static void update_function(struct tep_handle *pevent) { struct tep_event *event; event = update_event(pevent, "ftrace", "function", &function_type); if (!event) return; function_ip_field = tep_find_field(event, "ip"); function_parent_ip_field = tep_find_field(event, "parent_ip"); } static void update_function_graph_entry(struct tep_handle *pevent) { struct tep_event *event; event = update_event(pevent, "ftrace", "funcgraph_entry", &function_graph_entry_type); if (!event) return; function_graph_entry_func_field = tep_find_field(event, "func"); function_graph_entry_depth_field = tep_find_field(event, "depth"); } static void update_function_graph_exit(struct tep_handle *pevent) { struct tep_event *event; event = update_event(pevent, "ftrace", "funcgraph_exit", &function_graph_exit_type); if (!event) return; function_graph_exit_func_field = tep_find_field(event, "func"); function_graph_exit_depth_field = tep_find_field(event, "depth"); function_graph_exit_calltime_field = tep_find_field(event, "calltime"); function_graph_exit_rettime_field = tep_find_field(event, "rettime"); function_graph_exit_overrun_field = tep_find_field(event, "overrun"); } static void update_kernel_stack(struct tep_handle *pevent) { struct tep_event *event; event = update_event(pevent, "ftrace", "kernel_stack", &kernel_stack_type); if (!event) return; kernel_stack_caller_field = tep_find_field(event, "caller"); } enum field { NEXT_PTR, SIB_PTR }; static struct chain *next_ptr(struct chain *chain, enum field field) { if (field == NEXT_PTR) return chain->next; return chain->sibling; } static struct chain *split_chain(struct chain *orig, int size, enum field field) { struct chain *chain; int i; if (size < 2) return NULL; for (i = 1; i < (size + 1) / 2; i++, orig = next_ptr(orig, field)) ; if (field == NEXT_PTR) { chain = orig->next; orig->next = NULL; } else { chain = orig->sibling; orig->sibling = NULL; } return chain; } static struct chain * merge_chains(struct chain *a, int nr_a, struct chain *b, int nr_b, enum field field) { struct chain *chain; struct chain *final; struct chain **next = &final; int i; if (!a) return b; if (!b) return a; for (i = 0, chain = a; chain; i++, chain = next_ptr(chain, field)) ; if (i != nr_a) die("WTF %d %d", i, nr_a); chain = split_chain(a, nr_a, field); a = merge_chains(chain, nr_a / 2, a, (nr_a + 1) / 2, field); chain = split_chain(b, nr_b, field); b = merge_chains(chain, nr_b / 2, b, (nr_b + 1) / 2, field); while (a && b) { if (a->count > b->count) { *next = a; if (field == NEXT_PTR) next = &a->next; else next = &a->sibling; a = *next; *next = NULL; } else { *next = b; if (field == NEXT_PTR) next = &b->next; else next = &b->sibling; b = *next; *next = NULL; } } if (a) *next = a; else *next = b; return final; } static void sort_chain_parents(struct chain *chain) { struct chain *parent; parent = split_chain(chain->parents, chain->nr_parents, SIB_PTR); chain->parents = merge_chains(parent, chain->nr_parents / 2, chain->parents, (chain->nr_parents + 1) / 2, SIB_PTR); for (chain = chain->parents; chain; chain = chain->sibling) sort_chain_parents(chain); } static void sort_chains(void) { struct chain *chain; chain = split_chain(chains, nr_chains, NEXT_PTR); /* The original always has more or equal to the split */ chains = merge_chains(chain, nr_chains / 2, chains, (nr_chains + 1) / 2, NEXT_PTR); for (chain = chains; chain; chain = chain->next) sort_chain_parents(chain); } static double get_percent(int total, int partial) { return ((double)partial / (double)total) * 100.0; } static int single_chain(struct chain *chain) { if (chain->nr_parents > 1) return 0; if (!chain->parents) return 1; return single_chain(chain->parents); } #define START " |\n" #define TICK " --- " #define BLANK " " #define LINE " |" #define INDENT " " unsigned long long line_mask; void make_indent(int indent) { int i; for (i = 0; i < indent; i++) { if (line_mask & (1 << i)) printf(LINE); else printf(INDENT); } } static void print_single_parent(struct chain *chain, int indent) { make_indent(indent); printf(BLANK); printf("%s\n", chain->parents->func); } static void dump_chain(struct tep_handle *pevent, struct chain *chain, int indent) { if (!chain->parents) return; print_single_parent(chain, indent); dump_chain(pevent, chain->parents, indent); } static void print_parents(struct tep_handle *pevent, struct chain *chain, int indent) { struct chain *parent = chain->parents; int x; if (single_chain(chain)) { dump_chain(pevent, chain, indent); return; } line_mask |= 1ULL << (indent); for (x = 0; parent; x++, parent = parent->sibling) { struct chain *save_parent; make_indent(indent + 1); printf("\n"); make_indent(indent + 1); printf("--%%%.2f-- %s # %d\n", get_percent(chain->count, parent->count), parent->func, parent->count); if (x == chain->nr_parents - 1) line_mask &= (1ULL << indent) - 1; if (single_chain(parent)) dump_chain(pevent, parent, indent + 1); else { save_parent = parent; while (parent && parent->parents && parent->nr_parents < 2 && parent->parents->count == parent->count) { print_single_parent(parent, indent + 1); parent = parent->parents; } if (parent) print_parents(pevent, parent, indent + 1); parent = save_parent; } } } static void print_chains(struct tep_handle *pevent) { struct chain *chain = chains; int pid; for (; chain; chain = chain->next) { pid = chain->pid_list->pid; if (chain != chains) printf("\n"); if (compact) printf(" %%%3.2f %30s #%d\n", get_percent(total_counts, chain->count), chain->func, chain->count); else printf(" %%%3.2f (%d) %s %30s #%d\n", get_percent(total_counts, chain->count), pid, tep_data_comm_from_pid(pevent, pid), chain->func, chain->count); printf(START); if (chain->event) printf(TICK "*%s*\n", chain->func); else printf(TICK "%s\n", chain->func); print_parents(pevent, chain, 0); } } static void do_trace_hist(struct tracecmd_input *handle) { struct tep_handle *pevent = tracecmd_get_tep(handle); struct tep_record *record; struct tep_event *event; int cpus; int cpu; int ret; cpus = tracecmd_cpus(handle); /* Need to get any event */ for (cpu = 0; cpu < cpus; cpu++) { record = tracecmd_peek_data(handle, cpu); if (record) break; } if (!record) die("No records found in file"); ret = tep_data_type(pevent, record); event = tep_find_event(pevent, ret); long_size = tracecmd_long_size(handle); common_type_hist = tep_find_common_field(event, "common_type"); if (!common_type_hist) die("Can't find a 'type' field?"); common_pid_field = tep_find_common_field(event, "common_pid"); if (!common_pid_field) die("Can't find a 'pid' field?"); update_sched_wakeup(pevent); update_sched_wakeup_new(pevent); update_sched_switch(pevent); update_function(pevent); update_function_graph_entry(pevent); update_function_graph_exit(pevent); update_kernel_stack(pevent); for (cpu = 0; cpu < cpus; cpu++) { for (;;) { struct tep_record *record; record = tracecmd_read_data(handle, cpu); if (!record) break; /* If we missed events, just flush out the current stack */ if (record->missed_events) flush_stack(); process_record(pevent, record); tracecmd_free_record(record); } } if (current_pid >= 0) save_call_chain(current_pid, ips, ips_idx, 0); if (pending_pid >= 0) save_call_chain(pending_pid, pending_ips, pending_ips_idx, 1); save_stored_stacks(); sort_chains(); print_chains(pevent); } void trace_hist(int argc, char **argv) { struct tracecmd_input *handle; const char *input_file = NULL; int instances; int ret; for (;;) { int c; c = getopt(argc-1, argv+1, "+hi:P"); if (c == -1) break; switch (c) { case 'h': usage(argv); break; case 'i': if (input_file) die("Only one input for historgram"); input_file = optarg; break; case 'P': compact = 1; break; default: usage(argv); } } if ((argc - optind) >= 2) { if (input_file) usage(argv); input_file = argv[optind + 1]; } if (!input_file) input_file = DEFAULT_INPUT_FILE; handle = tracecmd_alloc(input_file, 0); if (!handle) die("can't open %s\n", input_file); ret = tracecmd_read_headers(handle, 0); if (ret) { tracecmd_close(handle); return; } ret = tracecmd_init_data(handle); if (ret < 0) die("failed to init data"); if (ret > 0) die("trace-cmd hist does not work with latency traces\n"); instances = tracecmd_buffer_instances(handle); if (instances) { struct tracecmd_input *new_handle; int i; for (i = 0; i < instances; i++) { new_handle = tracecmd_buffer_instance_handle(handle, i); if (!new_handle) { warning("could not retrieve handle %d", i); continue; } do_trace_hist(new_handle); tracecmd_close(new_handle); } } else { do_trace_hist(handle); } tracecmd_close(handle); } trace-cmd-v3.3.1/tracecmd/trace-list.c000066400000000000000000000335511470231550600175520ustar00rootroot00000000000000// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt * */ #include #include #include "tracefs.h" #include "trace-local.h" static void dump_file_content(const char *path) { char buf[BUFSIZ]; ssize_t n; FILE *fp; fp = fopen(path, "r"); if (!fp) die("reading %s", path); do { n = fread(buf, 1, BUFSIZ, fp); if (n > 0) fwrite(buf, 1, n, stdout); } while (n > 0); fclose(fp); } void show_instance_file(struct buffer_instance *instance, const char *name) { char *path; path = tracefs_instance_get_file(instance->tracefs, name); dump_file_content(path); tracefs_put_tracing_file(path); } enum { SHOW_EVENT_FORMAT = 1 << 0, SHOW_EVENT_FILTER = 1 << 1, SHOW_EVENT_TRIGGER = 1 << 2, SHOW_EVENT_FULL = 1 << 3, }; void show_file(const char *name) { char *path; path = tracefs_get_tracing_file(name); dump_file_content(path); tracefs_put_tracing_file(path); } typedef int (*process_file_func)(char *buf, int len, int flags); static void process_file_re(process_file_func func, const char *name, const char *re, int flags) { regex_t reg; char *path; char *buf = NULL; char *str; FILE *fp; ssize_t n; size_t l = strlen(re); /* Just in case :-p */ if (!re || l == 0) { show_file(name); return; } /* Handle the newline at end of names for the user */ str = malloc(l + 3); if (!str) die("Failed to allocate reg ex %s", re); strcpy(str, re); if (re[l-1] == '$') strcpy(&str[l-1], "\n*$"); if (regcomp(®, str, REG_ICASE|REG_NOSUB)) die("invalid function regex '%s'", re); free(str); path = tracefs_get_tracing_file(name); fp = fopen(path, "r"); if (!fp) die("reading %s", path); tracefs_put_tracing_file(path); do { n = getline(&buf, &l, fp); if (n > 0 && regexec(®, buf, 0, NULL, 0) == 0) func(buf, n, flags); } while (n > 0); free(buf); fclose(fp); regfree(®); } static void show_event(process_file_func func, const char *system, const char *event, int flags) { char *buf; int ret; ret = asprintf(&buf, "%s:%s", system, event); if (ret < 0) die("Can not allocate event"); func(buf, strlen(buf), flags); free(buf); } static void show_system(process_file_func func, const char *system, int flags) { char **events; int e; events = tracefs_system_events(NULL, system); if (!events) /* die? */ return; for (e = 0; events[e]; e++) show_event(func, system, events[e], flags); } static void show_event_systems(process_file_func func, char **systems, int flags) { int s; for (s = 0; systems[s]; s++) show_system(func, systems[s], flags); } static void match_system_events(process_file_func func, const char *system, regex_t *reg, int flags) { char **events; int e; events = tracefs_system_events(NULL, system); if (!events) /* die? */ return; for (e = 0; events[e]; e++) { if (regexec(reg, events[e], 0, NULL, 0) == 0) show_event(func, system, events[e], flags); } tracefs_list_free(events); } static void process_events(process_file_func func, const char *re, int flags) { const char *ftrace = "ftrace"; regex_t system_reg; regex_t event_reg; char *str; size_t l = strlen(re); bool just_systems = true; char **systems; char *system; char *event; int s; systems = tracefs_event_systems(NULL); if (!systems) return process_file_re(func, "available_events", re, flags); if (!re || l == 0) { show_event_systems(func, systems, flags); return; } str = strdup(re); if (!str) die("Can not allocate momory for regex"); system = strtok(str, ":"); event = strtok(NULL, ""); if (regcomp(&system_reg, system, REG_ICASE|REG_NOSUB)) die("invalid regex '%s'", system); if (event) { if (regcomp(&event_reg, event, REG_ICASE|REG_NOSUB)) die("invalid regex '%s'", event); } else { /* * If the regex ends with ":", then event would be null, * but we do not want to match events. */ if (re[l-1] != ':') just_systems = false; } free(str); /* * See if this matches the special ftrace system, as ftrace is not included * in the systems list, but can get events from tracefs_system_events(). */ if (regexec(&system_reg, ftrace, 0, NULL, 0) == 0) { if (!event) show_system(func, ftrace, flags); else match_system_events(func, ftrace, &event_reg, flags); } else if (!just_systems) { match_system_events(func, ftrace, &system_reg, flags); } for (s = 0; systems[s]; s++) { if (regexec(&system_reg, systems[s], 0, NULL, 0) == 0) { if (!event) { show_system(func, systems[s], flags); continue; } match_system_events(func, systems[s], &event_reg, flags); continue; } if (just_systems) continue; match_system_events(func, systems[s], &system_reg, flags); } tracefs_list_free(systems); regfree(&system_reg); if (event) regfree(&event_reg); } static int show_file_write(char *buf, int len, int flags) { return fwrite(buf, 1, len, stdout); } static void show_file_re(const char *name, const char *re) { process_file_re(show_file_write, name, re, 0); } static char *get_event_file(const char *type, char *buf, int len) { char *system; char *event; char *path; char *file; int ret; if (buf[len-1] == '\n') buf[len-1] = '\0'; system = strtok(buf, ":"); if (!system) die("no system found in %s", buf); event = strtok(NULL, ":"); if (!event) die("no event found in %s\n", buf); path = tracefs_get_tracing_file("events"); ret = asprintf(&file, "%s/%s/%s/%s", path, system, event, type); if (ret < 0) die("Failed to allocate event file %s %s", system, event); tracefs_put_tracing_file(path); return file; } static int event_filter_write(char *buf, int len, int flags) { char *file; if (buf[len-1] == '\n') buf[len-1] = '\0'; printf("%s\n", buf); file = get_event_file("filter", buf, len); dump_file_content(file); free(file); printf("\n"); return 0; } static int event_trigger_write(char *buf, int len, int flags) { char *file; if (buf[len-1] == '\n') buf[len-1] = '\0'; printf("%s\n", buf); file = get_event_file("trigger", buf, len); dump_file_content(file); free(file); printf("\n"); return 0; } static int event_format_write(char *fbuf, int len, int flags) { char *file = get_event_file("format", fbuf, len); char *buf = NULL; size_t l; FILE *fp; bool full; int n; full = flags & SHOW_EVENT_FULL; /* The get_event_file() crops system in fbuf */ printf("system: %s\n", fbuf); /* Don't print the print fmt, it's ugly */ fp = fopen(file, "r"); if (!fp) die("reading %s", file); do { n = getline(&buf, &l, fp); if (n > 0) { if (!full && strncmp(buf, "print fmt", 9) == 0) break; fwrite(buf, 1, n, stdout); } } while (n > 0); fclose(fp); free(buf); free(file); return 0; } static int event_name(char *buf, int len, int flags) { printf("%s\n", buf); return 0; } static void show_event_filter_re(const char *re) { process_events(event_filter_write, re, 0); } static void show_event_trigger_re(const char *re) { process_events(event_trigger_write, re, 0); } static void show_event_format_re(const char *re, int flags) { process_events(event_format_write, re, flags); } static void show_event_names_re(const char *re) { process_events(event_name, re, 0); } static void show_events(const char *eventre, int flags) { if (flags && !eventre) die("When specifying event files, an event must be named"); if (eventre) { if (flags & SHOW_EVENT_FORMAT) show_event_format_re(eventre, flags); else if (flags & SHOW_EVENT_FILTER) show_event_filter_re(eventre); else if (flags & SHOW_EVENT_TRIGGER) show_event_trigger_re(eventre); else show_event_names_re(eventre); } else show_file("available_events"); } static void show_tracers(void) { show_file("available_tracers"); } void show_options(const char *prefix, struct buffer_instance *buffer) { struct tracefs_instance *instance = buffer ? buffer->tracefs : NULL; struct dirent *dent; struct stat st; char *path; DIR *dir; if (!prefix) prefix = ""; path = tracefs_instance_get_file(instance, "options"); if (!path) goto show_file; if (stat(path, &st) < 0) goto show_file; if ((st.st_mode & S_IFMT) != S_IFDIR) goto show_file; dir = opendir(path); if (!dir) die("Can not read instance directory"); while ((dent = readdir(dir))) { const char *name = dent->d_name; long long val; char *file; int ret; if (strcmp(name, ".") == 0 || strcmp(name, "..") == 0) continue; ret = asprintf(&file, "options/%s", name); if (ret < 0) die("Failed to allocate file name"); ret = tracefs_instance_file_read_number(instance, file, &val); if (!ret) { if (val) printf("%s%s\n", prefix, name); else printf("%sno%s\n", prefix, name); } free(file); } closedir(dir); tracefs_put_tracing_file(path); return; show_file: tracefs_put_tracing_file(path); show_file("trace_options"); } static void show_clocks(void) { char *clocks; int size; clocks = tracefs_instance_file_read(NULL, "trace_clock", &size); if (!clocks) die("getting clocks"); if (clocks[size - 1] == '\n') clocks[size - 1] = 0; if (trace_tsc2nsec_is_supported()) printf("%s %s\n", clocks, TSCNSEC_CLOCK); else printf("%s\n", clocks); free(clocks); } static void show_functions(const char *funcre) { if (funcre) show_file_re("available_filter_functions", funcre); else show_file("available_filter_functions"); } static void show_buffers(void) { struct dirent *dent; DIR *dir; char *path; int printed = 0; path = tracefs_get_tracing_file("instances"); dir = opendir(path); tracefs_put_tracing_file(path); if (!dir) die("Can not read instance directory"); while ((dent = readdir(dir))) { const char *name = dent->d_name; if (strcmp(name, ".") == 0 || strcmp(name, "..") == 0) continue; printf("%s\n", name); printed = 1; } closedir(dir); if (!printed) printf("No buffer instances defined\n"); } static void show_systems(void) { struct dirent *dent; char *path; DIR *dir; path = tracefs_get_tracing_file("events"); dir = opendir(path); if (!dir) die("Can not read events directory"); while ((dent = readdir(dir))) { const char *name = dent->d_name; struct stat st; char *spath; int ret; if (strcmp(name, ".") == 0 || strcmp(name, "..") == 0) continue; if (asprintf(&spath, "%s/%s", path, name) < 0) continue; ret = stat(spath, &st); if (!ret && S_ISDIR(st.st_mode)) printf("%s\n", name); free(spath); } printf("\n"); closedir(dir); tracefs_put_tracing_file(path); } static void show_plugin_options(void) { struct tep_handle *pevent; struct tep_plugin_list *list; struct trace_seq s; tracecmd_ftrace_load_options(); pevent = tep_alloc(); if (!pevent) die("Can not allocate pevent\n"); trace_seq_init(&s); list = trace_load_plugins(pevent, 0); tep_plugin_print_options(&s); trace_seq_do_printf(&s); tep_unload_plugins(list, pevent); tep_free(pevent); } void trace_option(int argc, char **argv) { show_plugin_options(); } static void show_plugins(void) { struct tep_handle *pevent; struct tep_plugin_list *list; struct trace_seq s; pevent = tep_alloc(); if (!pevent) die("Can not allocate pevent\n"); trace_seq_init(&s); list = trace_load_plugins(pevent, 0); tep_print_plugins(&s, " ", "\n", list); trace_seq_do_printf(&s); tep_unload_plugins(list, pevent); tep_free(pevent); } static void show_compression(void) { char **versions, **names; int c, i; c = tracecmd_compress_protos_get(&names, &versions); if (c <= 0) { printf("No compression algorithms are supported\n"); return; } printf("Supported compression algorithms:\n"); for (i = 0; i < c; i++) printf("\t%s, %s\n", names[i], versions[i]); free(names); free(versions); } void trace_list(int argc, char **argv) { int events = 0; int tracer = 0; int options = 0; int funcs = 0; int buffers = 0; int clocks = 0; int plug = 0; int plug_op = 0; int flags = 0; int systems = 0; int show_all = 1; int compression = 0; int i; const char *arg; const char *funcre = NULL; const char *eventre = NULL; for (i = 2; i < argc; i++) { arg = NULL; if (argv[i][0] == '-') { if (i < argc - 1) { if (argv[i+1][0] != '-') arg = argv[i+1]; } switch (argv[i][1]) { case 'h': usage(argv); break; case 'e': events = 1; eventre = arg; show_all = 0; break; case 'B': buffers = 1; show_all = 0; break; case 'C': clocks = 1; show_all = 0; break; case 'F': flags |= SHOW_EVENT_FORMAT; break; case 'R': flags |= SHOW_EVENT_TRIGGER; break; case 'l': flags |= SHOW_EVENT_FILTER; break; case 'p': case 't': tracer = 1; show_all = 0; break; case 'P': plug = 1; show_all = 0; break; case 'O': plug_op = 1; show_all = 0; break; case 'o': options = 1; show_all = 0; break; case 'f': funcs = 1; funcre = arg; show_all = 0; break; case 's': systems = 1; show_all = 0; break; case 'c': compression = 1; show_all = 0; break; case '-': if (strcmp(argv[i], "--debug") == 0) { tracecmd_set_debug(true); break; } if (strcmp(argv[i], "--full") == 0) { flags |= SHOW_EVENT_FULL; break; } fprintf(stderr, "list: invalid option -- '%s'\n", argv[i]); default: fprintf(stderr, "list: invalid option -- '%c'\n", argv[i][1]); usage(argv); } } } if (events) show_events(eventre, flags); if (tracer) show_tracers(); if (options) show_options(NULL, NULL); if (plug) show_plugins(); if (plug_op) show_plugin_options(); if (funcs) show_functions(funcre); if (buffers) show_buffers(); if (clocks) show_clocks(); if (systems) show_systems(); if (compression) show_compression(); if (show_all) { printf("event systems:\n"); show_systems(); printf("events:\n"); show_events(NULL, 0); printf("\ntracers:\n"); show_tracers(); printf("\noptions:\n"); show_options(NULL, NULL); show_compression(); } return; } trace-cmd-v3.3.1/tracecmd/trace-listen.c000066400000000000000000000556631470231550600201050ustar00rootroot00000000000000// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt * */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef VSOCK #include #endif #include "trace-local.h" #include "trace-msg.h" #define dprint(fmt, ...) tracecmd_debug(fmt, ##__VA_ARGS__) #define MAX_OPTION_SIZE 4096 #define _VAR_DIR_Q(dir) #dir #define VAR_DIR_Q(dir) _VAR_DIR_Q(dir) #define VAR_RUN_DIR VAR_DIR_Q(VAR_DIR) "/run" #define LISTEN_PIDFILE "trace-cmd-net.pid" static char *default_output_dir = "."; static char *output_dir; static char *default_output_file = "trace"; static char *output_file; static bool use_vsock; static int backlog = 5; static int do_daemon; /* Used for signaling INT to finish */ static struct tracecmd_msg_handle *stop_msg_handle; static bool done; #define pdie(fmt, ...) \ do { \ tracecmd_plog_error(fmt, ##__VA_ARGS__);\ if (do_daemon) \ remove_pid_file(LISTEN_PIDFILE);\ exit(-1); \ } while (0) #define TEMP_FILE_STR "%s.%s:%s.cpu%d", output_file, host, port, cpu static char *get_temp_file(const char *host, const char *port, int cpu) { char *file = NULL; int size; size = snprintf(file, 0, TEMP_FILE_STR); file = malloc(size + 1); if (!file) return NULL; sprintf(file, TEMP_FILE_STR); return file; } static void put_temp_file(char *file) { free(file); } static void signal_setup(int sig, sighandler_t handle) { struct sigaction action; sigaction(sig, NULL, &action); /* Make accept return EINTR */ action.sa_flags &= ~SA_RESTART; action.sa_handler = handle; sigaction(sig, &action, NULL); } static void delete_temp_file(const char *host, const char *port, int cpu) { char file[PATH_MAX]; snprintf(file, PATH_MAX, TEMP_FILE_STR); unlink(file); } static int read_string(int fd, char *buf, size_t size) { size_t i; int n; for (i = 0; i < size; i++) { n = read(fd, buf+i, 1); if (!buf[i] || n <= 0) break; } return i; } static int process_option(struct tracecmd_msg_handle *msg_handle, char *option) { /* currently the only option we have is to us TCP */ if (strcmp(option, "TCP") == 0) { msg_handle->flags |= TRACECMD_MSG_FL_USE_TCP; return 1; } return 0; } static void finish(int sig) { if (stop_msg_handle) tracecmd_msg_set_done(stop_msg_handle); done = true; } void make_pid_name(char *buf, const char *pidfile_basename) { snprintf(buf, PATH_MAX, VAR_RUN_DIR "/%s", pidfile_basename); } void remove_pid_file(const char *pidfile_basename) { char buf[PATH_MAX]; make_pid_name(buf, pidfile_basename); unlink(buf); } static int process_child(int sfd, const char *host, const char *port, int cpu, int page_size, enum port_type type) { struct sockaddr_storage peer_addr; #ifdef VSOCK struct sockaddr_vm vm_addr; #endif struct sockaddr *addr; socklen_t addr_len; char buf[page_size]; char *tempfile; int left; int cfd; int fd; int r, w; int once = 0; signal_setup(SIGUSR1, finish); tempfile = get_temp_file(host, port, cpu); if (!tempfile) return -ENOMEM; fd = open(tempfile, O_WRONLY | O_TRUNC | O_CREAT, 0644); if (fd < 0) pdie("creating %s", tempfile); if (type == USE_TCP) { addr = (struct sockaddr *)&peer_addr; addr_len = sizeof(peer_addr); #ifdef VSOCK } else if (type == USE_VSOCK) { addr = (struct sockaddr *)&vm_addr; addr_len = sizeof(vm_addr); #endif } if (type == USE_TCP || type == USE_VSOCK) { if (listen(sfd, backlog) < 0) pdie("listen"); cfd = accept(sfd, addr, &addr_len); if (cfd < 0 && errno == EINTR) goto done; if (cfd < 0) pdie("accept"); close(sfd); sfd = cfd; } for (;;) { /* TODO, make this copyless! */ r = read(sfd, buf, page_size); if (r < 0) { if (errno == EINTR) break; pdie("reading pages from client"); } if (!r) break; /* UDP requires that we get the full size in one go */ if (type == USE_UDP && r < page_size && !once) { once = 1; warning("read %d bytes, expected %d", r, page_size); } left = r; do { w = write(fd, buf + (r - left), left); if (w > 0) left -= w; } while (w >= 0 && left); } done: put_temp_file(tempfile); exit(0); } static int setup_vsock_port(int start_port, int *sfd) { int sd; sd = trace_vsock_make(start_port); if (sd < 0) return -errno; *sfd = sd; return start_port; } int trace_net_make(int port, enum port_type type) { struct addrinfo hints; struct addrinfo *result, *rp; char buf[BUFSIZ]; int sd; int s; snprintf(buf, BUFSIZ, "%d", port); memset(&hints, 0, sizeof(hints)); hints.ai_family = AF_UNSPEC; hints.ai_flags = AI_PASSIVE; switch (type) { case USE_TCP: hints.ai_socktype = SOCK_STREAM; break; case USE_UDP: hints.ai_socktype = SOCK_DGRAM; break; default: return -1; } s = getaddrinfo(NULL, buf, &hints, &result); if (s != 0) pdie("getaddrinfo: error opening socket"); for (rp = result; rp != NULL; rp = rp->ai_next) { sd = socket(rp->ai_family, rp->ai_socktype, rp->ai_protocol); if (sd < 0) continue; set_tcp_no_delay(sd, rp->ai_socktype); if (bind(sd, rp->ai_addr, rp->ai_addrlen) == 0) break; close(sd); } freeaddrinfo(result); if (rp == NULL) return -1; dprint("Create listen port: %d fd:%d\n", port, sd); return sd; } int trace_net_search(int start_port, int *sfd, enum port_type type) { int num_port = start_port; if (type == USE_VSOCK) return setup_vsock_port(start_port, sfd); again: *sfd = trace_net_make(num_port, type); if (*sfd < 0) { if (++num_port > MAX_PORT_SEARCH) pdie("No available ports to bind"); goto again; } return num_port; } static void fork_reader(int sfd, const char *node, const char *port, int *pid, int cpu, int pagesize, enum port_type type) { int ret; *pid = fork(); if (*pid < 0) pdie("creating reader"); if (!*pid) { ret = process_child(sfd, node, port, cpu, pagesize, type); if (ret < 0) pdie("Problem with reader %d", ret); } close(sfd); } static int open_port(const char *node, const char *port, int *pid, int cpu, int pagesize, int start_port, enum port_type type) { int sfd; int num_port; /* * trace_net_search() currently does not return an error, but if that * changes in the future, we have a check for it now. */ num_port = trace_net_search(start_port, &sfd, type); if (num_port < 0) return num_port; fork_reader(sfd, node, port, pid, cpu, pagesize, type); return num_port; } static int communicate_with_client(struct tracecmd_msg_handle *msg_handle) { char *last_proto = NULL; char buf[BUFSIZ]; char *option; int pagesize = 0; int options; int size; int cpus; int n, s, t, i; int ret = -EINVAL; int fd = msg_handle->fd; /* Let the client know what we are */ write(fd, "tracecmd", 8); try_again: /* read back the CPU count */ n = read_string(fd, buf, BUFSIZ); if (n == BUFSIZ) /** ERROR **/ return -EINVAL; cpus = atoi(buf); /* Is the client using the new protocol? */ if (cpus == -1) { if (memcmp(buf, V3_CPU, n) != 0) { /* If it did not send a version, then bail */ if (memcmp(buf, "-1V", 3)) { tracecmd_plog("Unknown string %s\n", buf); goto out; } /* Skip "-1" */ tracecmd_plog("Cannot handle the protocol %s\n", buf+2); /* If it returned the same command as last time, bail! */ if (last_proto && strncmp(last_proto, buf, n) == 0) { tracecmd_plog("Repeat of version %s sent\n", last_proto); goto out; } free(last_proto); last_proto = malloc(n + 1); if (last_proto) { memcpy(last_proto, buf, n); last_proto[n] = 0; } /* Return the highest protocol we can use */ write(fd, "V3", 3); goto try_again; } /* Let the client know we use v3 protocol */ write(fd, "V3", 3); /* read the rest of dummy data */ n = read(fd, buf, sizeof(V3_MAGIC)); if (memcmp(buf, V3_MAGIC, n) != 0) goto out; /* We're off! */ write(fd, "OK", 2); msg_handle->version = V3_PROTOCOL; /* read the CPU count, the page size, and options */ if ((pagesize = tracecmd_msg_initial_setting(msg_handle)) < 0) goto out; } else { /* The client is using the v1 protocol */ tracecmd_plog("cpus=%d\n", cpus); if (cpus < 0) goto out; msg_handle->cpu_count = cpus; /* next read the page size */ n = read_string(fd, buf, BUFSIZ); if (n == BUFSIZ) /** ERROR **/ goto out; pagesize = atoi(buf); tracecmd_plog("pagesize=%d\n", pagesize); if (pagesize <= 0) goto out; /* Now the number of options */ n = read_string(fd, buf, BUFSIZ); if (n == BUFSIZ) /** ERROR **/ return -EINVAL; options = atoi(buf); for (i = 0; i < options; i++) { /* next is the size of the options */ n = read_string(fd, buf, BUFSIZ); if (n == BUFSIZ) /** ERROR **/ goto out; size = atoi(buf); /* prevent a client from killing us */ if (size > MAX_OPTION_SIZE) goto out; ret = -ENOMEM; option = malloc(size); if (!option) goto out; ret = -EIO; t = size; s = 0; do { s = read(fd, option+s, t); if (s <= 0) { free(option); goto out; } t -= s; s = size - t; } while (t); s = process_option(msg_handle, option); free(option); /* do we understand this option? */ ret = -EINVAL; if (!s) goto out; } } if (msg_handle->flags & TRACECMD_MSG_FL_USE_TCP) tracecmd_plog("Using TCP for live connection\n"); ret = pagesize; out: free(last_proto); return ret; } static int create_client_file(const char *node, const char *port) { char buf[BUFSIZ]; int ofd; snprintf(buf, BUFSIZ, "%s.%s:%s.dat", output_file, node, port); ofd = open(buf, O_RDWR | O_CREAT | O_TRUNC, 0644); if (ofd < 0) pdie("Can not create file %s", buf); return ofd; } static void destroy_all_readers(int cpus, int *pid_array, const char *node, const char *port) { int cpu; for (cpu = 0; cpu < cpus; cpu++) { if (pid_array[cpu] > 0) { kill(pid_array[cpu], SIGKILL); waitpid(pid_array[cpu], NULL, 0); delete_temp_file(node, port, cpu); pid_array[cpu] = 0; } } free(pid_array); } static int *create_all_readers(const char *node, const char *port, int pagesize, struct tracecmd_msg_handle *msg_handle) { enum port_type port_type = USE_UDP; char buf[BUFSIZ]; unsigned int *port_array; int *pid_array; unsigned int start_port; unsigned int connect_port; int cpus = msg_handle->cpu_count; int cpu; int pid; if (!pagesize) return NULL; if (msg_handle->flags & TRACECMD_MSG_FL_USE_TCP) port_type = USE_TCP; else if (msg_handle->flags & TRACECMD_MSG_FL_USE_VSOCK) port_type = USE_VSOCK; port_array = malloc(sizeof(*port_array) * cpus); if (!port_array) return NULL; pid_array = malloc(sizeof(*pid_array) * cpus); if (!pid_array) { free(port_array); return NULL; } memset(pid_array, 0, sizeof(int) * cpus); start_port = START_PORT_SEARCH; /* Now create a port for each CPU */ for (cpu = 0; cpu < cpus; cpu++) { connect_port = open_port(node, port, &pid, cpu, pagesize, start_port, port_type); if (connect_port < 0) goto out_free; port_array[cpu] = connect_port; pid_array[cpu] = pid; /* * Due to some bugging finding ports, * force search after last port */ start_port = connect_port + 1; } if (msg_handle->version == V3_PROTOCOL) { /* send set of port numbers to the client */ if (tracecmd_msg_send_port_array(msg_handle, port_array) < 0) { tracecmd_plog("Failed sending port array\n"); goto out_free; } } else { /* send the client a comma deliminated set of port numbers */ for (cpu = 0; cpu < cpus; cpu++) { snprintf(buf, BUFSIZ, "%s%d", cpu ? "," : "", port_array[cpu]); write(msg_handle->fd, buf, strlen(buf)); } /* end with null terminator */ write(msg_handle->fd, "\0", 1); } free(port_array); return pid_array; out_free: free(port_array); destroy_all_readers(cpus, pid_array, node, port); return NULL; } static int collect_metadata_from_client(struct tracecmd_msg_handle *msg_handle, int ofd) { char buf[BUFSIZ]; int n, s, t; int ifd = msg_handle->fd; int ret = 0; do { n = read(ifd, buf, BUFSIZ); if (n < 0) { if (errno == EINTR) continue; ret = -errno; break; } t = n; s = 0; do { s = write(ofd, buf+s, t); if (s < 0) { if (errno == EINTR) break; ret = -errno; goto out; } t -= s; s = n - t; } while (t); } while (n > 0 && !tracecmd_msg_done(msg_handle)); out: return ret; } static void stop_all_readers(int cpus, int *pid_array) { int cpu; for (cpu = 0; cpu < cpus; cpu++) { if (pid_array[cpu] > 0) kill(pid_array[cpu], SIGUSR1); } } static int put_together_file(int cpus, int ofd, const char *node, const char *port, bool write_options) { struct tracecmd_output *handle = NULL; char **temp_files; int cpu; int ret = -ENOMEM; /* Now put together the file */ temp_files = malloc(sizeof(*temp_files) * cpus); if (!temp_files) return -ENOMEM; for (cpu = 0; cpu < cpus; cpu++) { temp_files[cpu] = get_temp_file(node, port, cpu); if (!temp_files[cpu]) goto out; } handle = tracecmd_get_output_handle_fd(ofd); if (!handle) { ret = -1; goto out; } if (write_options) { ret = tracecmd_write_cpus(handle, cpus); if (ret) goto out; ret = tracecmd_write_buffer_info(handle); if (ret) goto out; ret = tracecmd_write_options(handle); if (ret) goto out; } ret = tracecmd_write_cpu_data(handle, cpus, temp_files, NULL); out: tracecmd_output_close(handle); for (cpu--; cpu >= 0; cpu--) { put_temp_file(temp_files[cpu]); } free(temp_files); return ret; } static int process_client(struct tracecmd_msg_handle *msg_handle, const char *node, const char *port) { int *pid_array; int pagesize; int cpus; int ofd; int ret; pagesize = communicate_with_client(msg_handle); if (pagesize < 0) return pagesize; ofd = create_client_file(node, port); pid_array = create_all_readers(node, port, pagesize, msg_handle); if (!pid_array) { close(ofd); return -ENOMEM; } /* on signal stop this msg */ stop_msg_handle = msg_handle; /* Now we are ready to start reading data from the client */ if (msg_handle->version == V3_PROTOCOL) ret = tracecmd_msg_collect_data(msg_handle, ofd); else ret = collect_metadata_from_client(msg_handle, ofd); stop_msg_handle = NULL; /* wait a little to let our readers finish reading */ sleep(1); cpus = msg_handle->cpu_count; /* stop our readers */ stop_all_readers(cpus, pid_array); /* wait a little to have the readers clean up */ sleep(1); if (!ret) ret = put_together_file(cpus, ofd, node, port, msg_handle->version < V3_PROTOCOL); destroy_all_readers(cpus, pid_array, node, port); close(ofd); return ret; } static int do_fork(int cfd) { pid_t pid; /* in debug mode, we do not fork off children */ if (tracecmd_get_debug()) return 0; pid = fork(); if (pid < 0) { warning("failed to create child"); return -1; } if (pid > 0) { close(cfd); return pid; } signal_setup(SIGINT, finish); return 0; } bool trace_net_cmp_connection(struct sockaddr_storage *addr, const char *name) { char host[NI_MAXHOST], nhost[NI_MAXHOST]; char service[NI_MAXSERV]; socklen_t addr_len = sizeof(*addr); struct addrinfo *result, *rp; struct addrinfo hints; bool found = false; int s; if (getnameinfo((struct sockaddr *)addr, addr_len, host, NI_MAXHOST, service, NI_MAXSERV, NI_NUMERICSERV)) return -1; if (strcmp(host, name) == 0) return true; /* Check other IPs that name could be for */ memset(&hints, 0, sizeof(hints)); hints.ai_family = AF_UNSPEC; hints.ai_socktype = SOCK_STREAM; /* Check other IPs that name could be for */ s = getaddrinfo(name, NULL, &hints, &result); if (s != 0) return false; for (rp = result; rp != NULL; rp = rp->ai_next) { if (getnameinfo(rp->ai_addr, rp->ai_addrlen, nhost, NI_MAXHOST, service, NI_MAXSERV, NI_NUMERICSERV)) continue; if (strcmp(host, nhost) == 0) { found = 1; break; } } freeaddrinfo(result); return found; } bool trace_net_cmp_connection_fd(int fd, const char *name) { struct sockaddr_storage addr; socklen_t addr_len = sizeof(addr); if (getpeername(fd, (struct sockaddr *)&addr, &addr_len)) return false; return trace_net_cmp_connection(&addr, name); }; int trace_net_print_connection(int fd) { char host[NI_MAXHOST], service[NI_MAXSERV]; struct sockaddr_storage net_addr; socklen_t addr_len; addr_len = sizeof(net_addr); if (getpeername(fd, (struct sockaddr *)&net_addr, &addr_len)) return -1; if (getnameinfo((struct sockaddr *)&net_addr, addr_len, host, NI_MAXHOST, service, NI_MAXSERV, NI_NUMERICSERV)) return -1; if (tracecmd_get_debug()) tracecmd_debug("Connected to %s:%s fd:%d\n", host, service, fd); else tracecmd_plog("Connected to %s:%s\n", host, service); return 0; } static int do_connection(int cfd, struct sockaddr *addr, socklen_t addr_len) { struct tracecmd_msg_handle *msg_handle; char host[NI_MAXHOST], service[NI_MAXSERV]; int s; int ret; ret = do_fork(cfd); if (ret) return ret; msg_handle = tracecmd_msg_handle_alloc(cfd, 0); if (use_vsock) { #ifdef VSOCK struct sockaddr_vm *vm_addr = (struct sockaddr_vm *)addr; snprintf(host, NI_MAXHOST, "V%d", vm_addr->svm_cid); snprintf(service, NI_MAXSERV, "%d", vm_addr->svm_port); #endif } else { s = getnameinfo((struct sockaddr *)addr, addr_len, host, NI_MAXHOST, service, NI_MAXSERV, NI_NUMERICSERV); if (s == 0) tracecmd_plog("Connected with %s:%s\n", host, service); else { tracecmd_plog("Error with getnameinfo: %s\n", gai_strerror(s)); close(cfd); tracecmd_msg_handle_close(msg_handle); return -1; } } process_client(msg_handle, host, service); tracecmd_msg_handle_close(msg_handle); if (!tracecmd_get_debug()) exit(0); return 0; } static int *client_pids; static int free_pids; static int saved_pids; static void add_process(int pid) { int *client = NULL; int i; if (free_pids) { for (i = 0; i < saved_pids; i++) { if (!client_pids[i]) { client = &client_pids[i]; break; } } free_pids--; if (!client) warning("Could not find free pid"); } if (!client) { client_pids = realloc(client_pids, sizeof(*client_pids) * (saved_pids + 1)); if (!client_pids) pdie("allocating pids"); client = &client_pids[saved_pids++]; } *client = pid; } static void remove_process(int pid) { int i; for (i = 0; i < saved_pids; i++) { if (client_pids[i] == pid) break; } if (i == saved_pids) return; client_pids[i] = 0; free_pids++; } static void kill_clients(void) { int status; int i; for (i = 0; i < saved_pids; i++) { if (!client_pids[i]) continue; /* Only kill the clients if we received SIGINT or SIGTERM */ if (done) kill(client_pids[i], SIGINT); waitpid(client_pids[i], &status, 0); } saved_pids = 0; } static void clean_up(void) { int status; int ret; /* Clean up any children that has started before */ do { ret = waitpid(0, &status, WNOHANG); if (ret > 0) remove_process(ret); } while (ret > 0); } static void do_accept_loop(int sfd) { struct sockaddr_storage peer_addr; #ifdef VSOCK struct sockaddr_vm vm_addr; #endif struct sockaddr *addr; socklen_t addr_len; int cfd, pid; if (use_vsock) { #ifdef VSOCK addr = (struct sockaddr *)&vm_addr; addr_len = sizeof(vm_addr); #endif } else { addr = (struct sockaddr *)&peer_addr; addr_len = sizeof(peer_addr); } do { cfd = accept(sfd, addr, &addr_len); if (cfd < 0 && errno == EINTR) { clean_up(); continue; } if (cfd < 0) pdie("connecting"); pid = do_connection(cfd, addr, addr_len); if (pid > 0) add_process(pid); } while (!done); /* Get any final stragglers */ clean_up(); } void make_pid_file(const char *pidfile_basename) { char buf[PATH_MAX]; int fd; make_pid_name(buf, pidfile_basename); fd = open(buf, O_WRONLY | O_CREAT | O_TRUNC, 0644); if (fd < 0) { perror(buf); return; } sprintf(buf, "%d\n", getpid()); write(fd, buf, strlen(buf)); close(fd); } static void sigstub(int sig) { } static int get_vsock(const char *port) { unsigned int cid; int sd; sd = trace_vsock_make(atoi(port)); if (sd < 0) return sd; cid = trace_vsock_local_cid(); if (cid >= 0) printf("listening on @%u:%s\n", cid, port); return sd; } static int get_network(char *port) { struct addrinfo hints; struct addrinfo *result, *rp; int sfd, s; memset(&hints, 0, sizeof(hints)); hints.ai_family = AF_UNSPEC; hints.ai_socktype = SOCK_STREAM; hints.ai_flags = AI_PASSIVE; s = getaddrinfo(NULL, port, &hints, &result); if (s != 0) pdie("getaddrinfo: error opening %s", port); for (rp = result; rp != NULL; rp = rp->ai_next) { sfd = socket(rp->ai_family, rp->ai_socktype, rp->ai_protocol); if (sfd < 0) continue; set_tcp_no_delay(sfd, rp->ai_socktype); if (bind(sfd, rp->ai_addr, rp->ai_addrlen) == 0) break; close(sfd); } if (rp == NULL) pdie("Could not bind"); freeaddrinfo(result); return sfd; } static void do_listen(char *port) { int sfd; if (!tracecmd_get_debug()) signal_setup(SIGCHLD, sigstub); if (do_daemon) make_pid_file(LISTEN_PIDFILE); if (use_vsock) sfd = get_vsock(port); else sfd = get_network(port); if (listen(sfd, backlog) < 0) pdie("listen"); do_accept_loop(sfd); kill_clients(); if (do_daemon) remove_pid_file(LISTEN_PIDFILE); } static void start_daemon(void) { do_daemon = 1; if (daemon(1, 0) < 0) die("starting daemon"); } enum { OPT_verbose = 254, OPT_debug = 255, }; void trace_listen(int argc, char **argv) { char *logfile = NULL; char *port = NULL; int daemon = 0; int c; if (argc < 2) usage(argv); if (strcmp(argv[1], "listen") != 0) usage(argv); for (;;) { int option_index = 0; static struct option long_options[] = { {"port", required_argument, NULL, 'p'}, {"help", no_argument, NULL, '?'}, {"debug", no_argument, NULL, OPT_debug}, {"verbose", optional_argument, NULL, OPT_verbose}, {NULL, 0, NULL, 0} }; c = getopt_long (argc-1, argv+1, "+hp:Vo:d:l:D", long_options, &option_index); if (c == -1) break; switch (c) { case 'h': usage(argv); break; case 'p': port = optarg; break; case 'd': output_dir = optarg; break; case 'V': use_vsock = true; break; case 'o': output_file = optarg; break; case 'l': logfile = optarg; break; case 'D': daemon = 1; break; case OPT_debug: tracecmd_set_debug(true); break; case OPT_verbose: if (trace_set_verbose(optarg) < 0) die("invalid verbose level %s", optarg); break; default: usage(argv); } } if (!port) usage(argv); if ((argc - optind) >= 2) usage(argv); if (!output_file) output_file = default_output_file; if (!output_dir) output_dir = default_output_dir; if (logfile) { /* set the writes to a logfile instead */ if (tracecmd_set_logfile(logfile) < 0) die("creating log file %s", logfile); } if (chdir(output_dir) < 0) die("Can't access directory %s", output_dir); if (daemon) start_daemon(); signal_setup(SIGINT, finish); signal_setup(SIGTERM, finish); do_listen(port); return; } trace-cmd-v3.3.1/tracecmd/trace-mem.c000066400000000000000000000326241470231550600173550ustar00rootroot00000000000000// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2013 Red Hat Inc, Steven Rostedt * * * This code was inspired by Ezequiel Garcia's trace_analyze program: * git://github.com/ezequielgarcia/trace_analyze.git * * Unfortuntately, I hate working with Python, and I also had trouble * getting it to work, as I had an old python on my Fedora 13, and it * was written for the newer version. I decided to do some of it here * in C. */ #include #include #include #include #include #include #include "trace-local.h" #include "trace-hash-local.h" #include "list.h" static int kmalloc_type; static int kmalloc_node_type; static int kfree_type; static int kmem_cache_alloc_type; static int kmem_cache_alloc_node_type; static int kmem_cache_free_type; static struct tep_format_field *common_type_mem; static struct tep_format_field *kmalloc_callsite_field; static struct tep_format_field *kmalloc_bytes_req_field; static struct tep_format_field *kmalloc_bytes_alloc_field; static struct tep_format_field *kmalloc_ptr_field; static struct tep_format_field *kmalloc_node_callsite_field; static struct tep_format_field *kmalloc_node_bytes_req_field; static struct tep_format_field *kmalloc_node_bytes_alloc_field; static struct tep_format_field *kmalloc_node_ptr_field; static struct tep_format_field *kfree_ptr_field; static struct tep_format_field *kmem_cache_callsite_field; static struct tep_format_field *kmem_cache_bytes_req_field; static struct tep_format_field *kmem_cache_bytes_alloc_field; static struct tep_format_field *kmem_cache_ptr_field; static struct tep_format_field *kmem_cache_node_callsite_field; static struct tep_format_field *kmem_cache_node_bytes_req_field; static struct tep_format_field *kmem_cache_node_bytes_alloc_field; static struct tep_format_field *kmem_cache_node_ptr_field; static struct tep_format_field *kmem_cache_free_ptr_field; static void *zalloc(size_t size) { return calloc(1, size); } static struct tep_event * update_event(struct tep_handle *pevent, const char *sys, const char *name, int *id) { struct tep_event *event; event = tep_find_event_by_name(pevent, sys, name); if (!event) return NULL; *id = event->id; return event; } static void update_kmalloc(struct tep_handle *pevent) { struct tep_event *event; event = update_event(pevent, "kmem", "kmalloc", &kmalloc_type); if (!event) return; kmalloc_callsite_field = tep_find_field(event, "call_site"); kmalloc_bytes_req_field = tep_find_field(event, "bytes_req"); kmalloc_bytes_alloc_field = tep_find_field(event, "bytes_alloc"); kmalloc_ptr_field = tep_find_field(event, "ptr"); } static void update_kmalloc_node(struct tep_handle *pevent) { struct tep_event *event; event = update_event(pevent, "kmem", "kmalloc_node", &kmalloc_node_type); if (!event) return; kmalloc_node_callsite_field = tep_find_field(event, "call_site"); kmalloc_node_bytes_req_field = tep_find_field(event, "bytes_req"); kmalloc_node_bytes_alloc_field = tep_find_field(event, "bytes_alloc"); kmalloc_node_ptr_field = tep_find_field(event, "ptr"); } static void update_kfree(struct tep_handle *pevent) { struct tep_event *event; event = update_event(pevent, "kmem", "kfree", &kfree_type); if (!event) return; kfree_ptr_field = tep_find_field(event, "ptr"); } static void update_kmem_cache_alloc(struct tep_handle *pevent) { struct tep_event *event; event = update_event(pevent, "kmem", "kmem_cache_alloc", &kmem_cache_alloc_type); if (!event) return; kmem_cache_callsite_field = tep_find_field(event, "call_site"); kmem_cache_bytes_req_field = tep_find_field(event, "bytes_req"); kmem_cache_bytes_alloc_field = tep_find_field(event, "bytes_alloc"); kmem_cache_ptr_field = tep_find_field(event, "ptr"); } static void update_kmem_cache_alloc_node(struct tep_handle *pevent) { struct tep_event *event; event = update_event(pevent, "kmem", "kmem_cache_alloc_node", &kmem_cache_alloc_node_type); if (!event) return; kmem_cache_node_callsite_field = tep_find_field(event, "call_site"); kmem_cache_node_bytes_req_field = tep_find_field(event, "bytes_req"); kmem_cache_node_bytes_alloc_field = tep_find_field(event, "bytes_alloc"); kmem_cache_node_ptr_field = tep_find_field(event, "ptr"); } static void update_kmem_cache_free(struct tep_handle *pevent) { struct tep_event *event; event = update_event(pevent, "kmem", "kmem_cache_free", &kmem_cache_free_type); if (!event) return; kmem_cache_free_ptr_field = tep_find_field(event, "ptr"); } struct func_descr { struct func_descr *next; const char *func; unsigned long total_alloc; unsigned long total_req; unsigned long current_alloc; unsigned long current_req; unsigned long max_alloc; unsigned long max_req; unsigned long waste; unsigned long max_waste; }; struct ptr_descr { struct ptr_descr *next; struct func_descr *func; unsigned long long ptr; unsigned long alloc; unsigned long req; }; #define HASH_BITS 12 #define HASH_SIZE (1 << HASH_BITS) #define HASH_MASK (HASH_SIZE - 1); static struct func_descr *func_hash[HASH_SIZE]; static struct ptr_descr *ptr_hash[HASH_SIZE]; static struct func_descr **func_list; static unsigned func_count; static int make_key(const void *ptr, int size) { int key = 0; int i; char *kp = (char *)&key; const char *indx = ptr; for (i = 0; i < size; i++) kp[i & 3] ^= indx[i]; return trace_hash(key); } static struct func_descr *find_func(const char *func) { struct func_descr *funcd; int key = make_key(func, strlen(func)) & HASH_MASK; for (funcd = func_hash[key]; funcd; funcd = funcd->next) { /* * As func is always a constant to one pointer, * we can use a direct compare instead of strcmp. */ if (funcd->func == func) return funcd; } return NULL; } static struct func_descr *create_func(const char *func) { struct func_descr *funcd; int key = make_key(func, strlen(func)) & HASH_MASK; funcd = zalloc(sizeof(*funcd)); if (!funcd) die("malloc"); funcd->func = func; funcd->next = func_hash[key]; func_hash[key] = funcd; func_count++; return funcd; } static struct ptr_descr *find_ptr(unsigned long long ptr) { struct ptr_descr *ptrd; int key = make_key(&ptr, sizeof(ptr)) & HASH_MASK; for (ptrd = ptr_hash[key]; ptrd; ptrd = ptrd->next) { if (ptrd->ptr == ptr) return ptrd; } return NULL; } static struct ptr_descr *create_ptr(unsigned long long ptr) { struct ptr_descr *ptrd; int key = make_key(&ptr, sizeof(ptr)) & HASH_MASK; ptrd = zalloc(sizeof(*ptrd)); if (!ptrd) die("malloc"); ptrd->ptr = ptr; ptrd->next = ptr_hash[key]; ptr_hash[key] = ptrd; return ptrd; } static void remove_ptr(unsigned long long ptr) { struct ptr_descr *ptrd, **last; int key = make_key(&ptr, sizeof(ptr)) & HASH_MASK; last = &ptr_hash[key]; for (ptrd = ptr_hash[key]; ptrd; ptrd = ptrd->next) { if (ptrd->ptr == ptr) break; last = &ptrd->next; } if (!ptrd) return; *last = ptrd->next; free(ptrd); } static void add_kmalloc(const char *func, unsigned long long ptr, unsigned int req, int alloc) { struct func_descr *funcd; struct ptr_descr *ptrd; funcd = find_func(func); if (!funcd) funcd = create_func(func); funcd->total_alloc += alloc; funcd->total_req += req; funcd->current_alloc += alloc; funcd->current_req += req; if (funcd->current_alloc > funcd->max_alloc) funcd->max_alloc = funcd->current_alloc; if (funcd->current_req > funcd->max_req) funcd->max_req = funcd->current_req; ptrd = find_ptr(ptr); if (!ptrd) ptrd = create_ptr(ptr); ptrd->alloc = alloc; ptrd->req = req; ptrd->func = funcd; } static void remove_kmalloc(unsigned long long ptr) { struct func_descr *funcd; struct ptr_descr *ptrd; ptrd = find_ptr(ptr); if (!ptrd) return; funcd = ptrd->func; funcd->current_alloc -= ptrd->alloc; funcd->current_req -= ptrd->req; remove_ptr(ptr); } static void process_kmalloc(struct tep_handle *pevent, struct tep_record *record, struct tep_format_field *callsite_field, struct tep_format_field *bytes_req_field, struct tep_format_field *bytes_alloc_field, struct tep_format_field *ptr_field) { unsigned long long callsite; unsigned long long val; unsigned long long ptr; unsigned int req; int alloc; const char *func; tep_read_number_field(callsite_field, record->data, &callsite); tep_read_number_field(bytes_req_field, record->data, &val); req = val; tep_read_number_field(bytes_alloc_field, record->data, &val); alloc = val; tep_read_number_field(ptr_field, record->data, &ptr); func = tep_find_function(pevent, callsite); add_kmalloc(func, ptr, req, alloc); } static void process_kfree(struct tep_handle *pevent, struct tep_record *record, struct tep_format_field *ptr_field) { unsigned long long ptr; tep_read_number_field(ptr_field, record->data, &ptr); remove_kmalloc(ptr); } static void process_record(struct tep_handle *pevent, struct tep_record *record) { unsigned long long val; int type; tep_read_number_field(common_type_mem, record->data, &val); type = val; if (type == kmalloc_type) return process_kmalloc(pevent, record, kmalloc_callsite_field, kmalloc_bytes_req_field, kmalloc_bytes_alloc_field, kmalloc_ptr_field); if (type == kmalloc_node_type) return process_kmalloc(pevent, record, kmalloc_node_callsite_field, kmalloc_node_bytes_req_field, kmalloc_node_bytes_alloc_field, kmalloc_node_ptr_field); if (type == kfree_type) return process_kfree(pevent, record, kfree_ptr_field); if (type == kmem_cache_alloc_type) return process_kmalloc(pevent, record, kmem_cache_callsite_field, kmem_cache_bytes_req_field, kmem_cache_bytes_alloc_field, kmem_cache_ptr_field); if (type == kmem_cache_alloc_node_type) return process_kmalloc(pevent, record, kmem_cache_node_callsite_field, kmem_cache_node_bytes_req_field, kmem_cache_node_bytes_alloc_field, kmem_cache_node_ptr_field); if (type == kmem_cache_free_type) return process_kfree(pevent, record, kmem_cache_free_ptr_field); } static int func_cmp(const void *a, const void *b) { const struct func_descr *fa = *(const struct func_descr **)a; const struct func_descr *fb = *(const struct func_descr **)b; if (fa->waste > fb->waste) return -1; if (fa->waste < fb->waste) return 1; return 0; } static void sort_list(void) { struct func_descr *funcd; int h; int i = 0; func_list = zalloc(sizeof(*func_list) * func_count); for (h = 0; h < HASH_SIZE; h++) { for (funcd = func_hash[h]; funcd; funcd = funcd->next) { funcd->waste = funcd->current_alloc - funcd->current_req; funcd->max_waste = funcd->max_alloc - funcd->max_req; if (i == func_count) die("more funcs than expected\n"); func_list[i++] = funcd; } } qsort(func_list, func_count, sizeof(*func_list), func_cmp); } static void print_list(void) { struct func_descr *funcd; int i; printf(" Function \t"); printf("Waste\tAlloc\treq\t\tTotAlloc TotReq\t\tMaxAlloc MaxReq\t"); printf("MaxWaste\n"); printf(" -------- \t"); printf("-----\t-----\t---\t\t-------- ------\t\t-------- ------\t"); printf("--------\n"); for (i = 0; i < func_count; i++) { funcd = func_list[i]; printf("%32s\t%ld\t%ld\t%ld\t\t%8ld %8ld\t\t%8ld %8ld\t%ld\n", funcd->func, funcd->waste, funcd->current_alloc, funcd->current_req, funcd->total_alloc, funcd->total_req, funcd->max_alloc, funcd->max_req, funcd->max_waste); } } static void do_trace_mem(struct tracecmd_input *handle) { struct tep_handle *pevent = tracecmd_get_tep(handle); struct tep_record *record; struct tep_event *event; int missed_events = 0; int cpus; int cpu; int ret; ret = tracecmd_init_data(handle); if (ret < 0) die("failed to init data"); if (ret > 0) die("trace-cmd mem does not work with latency traces\n"); cpus = tracecmd_cpus(handle); /* Need to get any event */ for (cpu = 0; cpu < cpus; cpu++) { record = tracecmd_peek_data(handle, cpu); if (record) break; } if (!record) die("No records found in file"); ret = tep_data_type(pevent, record); event = tep_find_event(pevent, ret); common_type_mem = tep_find_common_field(event, "common_type"); if (!common_type_mem) die("Can't find a 'type' field?"); update_kmalloc(pevent); update_kmalloc_node(pevent); update_kfree(pevent); update_kmem_cache_alloc(pevent); update_kmem_cache_alloc_node(pevent); update_kmem_cache_free(pevent); while ((record = tracecmd_read_next_data(handle, &cpu))) { /* record missed event */ if (!missed_events && record->missed_events) missed_events = 1; process_record(pevent, record); tracecmd_free_record(record); } sort_list(); print_list(); } void trace_mem(int argc, char **argv) { struct tracecmd_input *handle; const char *input_file = NULL; int ret; for (;;) { int c; c = getopt(argc-1, argv+1, "+hi:"); if (c == -1) break; switch (c) { case 'h': usage(argv); break; case 'i': if (input_file) die("Only one input for mem"); input_file = optarg; break; default: usage(argv); } } if ((argc - optind) >= 2) { if (input_file) usage(argv); input_file = argv[optind + 1]; } if (!input_file) input_file = DEFAULT_INPUT_FILE; handle = tracecmd_alloc(input_file, 0); if (!handle) die("can't open %s\n", input_file); ret = tracecmd_read_headers(handle, 0); if (ret) return; do_trace_mem(handle); tracecmd_close(handle); } trace-cmd-v3.3.1/tracecmd/trace-profile.c000066400000000000000000001640301470231550600202340ustar00rootroot00000000000000// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2014 Red Hat Inc, Steven Rostedt * */ /** FIXME: Convert numbers based on machine and file */ #include #include #include #ifndef NO_AUDIT #include #endif #include "trace-local.h" #include "trace-hash.h" #include "trace-hash-local.h" #include "list.h" #include #ifdef WARN_NO_AUDIT # warning "lib audit not found, using raw syscalls " \ "(install audit-libs-devel(for fedora) or libaudit-dev(for debian/ubuntu) and try again)" #endif #define TASK_STATE_TO_CHAR_STR "RSDTtXZxKWP" #define TASK_STATE_MAX 1024 #define task_from_item(item) container_of(item, struct task_data, hash) #define start_from_item(item) container_of(item, struct start_data, hash) #define event_from_item(item) container_of(item, struct event_hash, hash) #define stack_from_item(item) container_of(item, struct stack_data, hash) #define group_from_item(item) container_of(item, struct group_data, hash) #define event_data_from_item(item) container_of(item, struct event_data, hash) static unsigned long long nsecs_per_sec(unsigned long long ts) { return ts / NSEC_PER_SEC; } static unsigned long long mod_to_usec(unsigned long long ts) { return ((ts % NSEC_PER_SEC) + NSEC_PER_USEC / 2) / NSEC_PER_USEC; } struct handle_data; struct event_hash; struct event_data; typedef void (*event_data_print)(struct trace_seq *s, struct event_hash *hash); typedef int (*handle_event_func)(struct handle_data *h, unsigned long long pid, struct event_data *data, struct tep_record *record, int cpu); enum event_data_type { EVENT_TYPE_UNDEFINED, EVENT_TYPE_STACK, EVENT_TYPE_SCHED_SWITCH, EVENT_TYPE_WAKEUP, EVENT_TYPE_FUNC, EVENT_TYPE_SYSCALL, EVENT_TYPE_IRQ, EVENT_TYPE_SOFTIRQ, EVENT_TYPE_SOFTIRQ_RAISE, EVENT_TYPE_PROCESS_EXEC, EVENT_TYPE_USER_MATE, }; struct event_data { struct trace_hash_item hash; int id; int trace; struct tep_event *event; struct event_data *end; struct event_data *start; struct tep_format_field *pid_field; struct tep_format_field *start_match_field; /* match with start */ struct tep_format_field *end_match_field; /* match with end */ struct tep_format_field *data_field; /* optional */ event_data_print print_func; handle_event_func handle_event; void *private; int migrate; /* start/end pairs can migrate cpus */ int global; /* use global tasks */ enum event_data_type type; }; struct stack_data { struct trace_hash_item hash; unsigned long long count; unsigned long long time; unsigned long long time_min; unsigned long long ts_min; unsigned long long time_max; unsigned long long ts_max; unsigned long long time_avg; unsigned long size; char caller[]; }; struct stack_holder { unsigned long size; void *caller; struct tep_record *record; }; struct start_data { struct trace_hash_item hash; struct event_data *event_data; struct list_head list; struct task_data *task; unsigned long long timestamp; unsigned long long search_val; unsigned long long val; int cpu; struct stack_holder stack; }; struct event_hash { struct trace_hash_item hash; struct event_data *event_data; unsigned long long search_val; unsigned long long val; unsigned long long count; unsigned long long time_total; unsigned long long time_avg; unsigned long long time_max; unsigned long long ts_max; unsigned long long time_min; unsigned long long ts_min; unsigned long long time_std; unsigned long long last_time; struct trace_hash stacks; }; struct group_data { struct trace_hash_item hash; char *comm; struct trace_hash event_hash; }; struct task_data { struct trace_hash_item hash; int pid; int sleeping; char *comm; struct trace_hash start_hash; struct trace_hash event_hash; struct task_data *proxy; struct start_data *last_start; struct event_hash *last_event; struct tep_record *last_stack; struct handle_data *handle; struct group_data *group; }; struct cpu_info { int current; }; struct sched_switch_data { struct tep_format_field *prev_state; int match_state; }; struct handle_data { struct handle_data *next; struct tracecmd_input *handle; struct tep_handle *pevent; struct trace_hash events; struct trace_hash group_hash; struct cpu_info **cpu_data; struct tep_format_field *common_pid; struct tep_format_field *wakeup_comm; struct tep_format_field *switch_prev_comm; struct tep_format_field *switch_next_comm; struct sched_switch_data sched_switch_blocked; struct sched_switch_data sched_switch_preempt; struct trace_hash task_hash; struct list_head *cpu_starts; struct list_head migrate_starts; struct task_data *global_task; struct task_data *global_percpu_tasks; int cpus; }; static struct handle_data *handles; static struct event_data *stacktrace_event; static bool merge_like_comms = false; void trace_profile_set_merge_like_comms(void) { merge_like_comms = true; } static struct start_data * add_start(struct task_data *task, struct event_data *event_data, struct tep_record *record, unsigned long long search_val, unsigned long long val) { struct start_data *start; start = malloc(sizeof(*start)); if (!start) return NULL; memset(start, 0, sizeof(*start)); start->hash.key = trace_hash(search_val); start->search_val = search_val; start->val = val; start->timestamp = record->ts; start->event_data = event_data; start->cpu = record->cpu; start->task = task; trace_hash_add(&task->start_hash, &start->hash); if (event_data->migrate) list_add(&start->list, &task->handle->migrate_starts); else list_add(&start->list, &task->handle->cpu_starts[record->cpu]); return start; } struct event_data_match { struct event_data *event_data; unsigned long long search_val; unsigned long long val; }; static int match_start(struct trace_hash_item *item, void *data) { struct start_data *start = start_from_item(item); struct event_data_match *edata = data; return start->event_data == edata->event_data && start->search_val == edata->search_val; } static int match_event(struct trace_hash_item *item, void *data) { struct event_data_match *edata = data; struct event_hash *event = event_from_item(item); return event->event_data == edata->event_data && event->search_val == edata->search_val && event->val == edata->val; } static struct event_hash * find_event_hash(struct task_data *task, struct event_data_match *edata) { struct event_hash *event_hash; struct trace_hash_item *item; unsigned long long key; key = (unsigned long)edata->event_data + (unsigned long)edata->search_val + (unsigned long)edata->val; key = trace_hash(key); item = trace_hash_find(&task->event_hash, key, match_event, edata); if (item) return event_from_item(item); event_hash = malloc(sizeof(*event_hash)); if (!event_hash) return NULL; memset(event_hash, 0, sizeof(*event_hash)); event_hash->event_data = edata->event_data; event_hash->search_val = edata->search_val; event_hash->val = edata->val; event_hash->hash.key = key; trace_hash_init(&event_hash->stacks, 32); trace_hash_add(&task->event_hash, &event_hash->hash); return event_hash; } static struct event_hash * find_start_event_hash(struct task_data *task, struct event_data *event_data, struct start_data *start) { struct event_data_match edata; edata.event_data = event_data; edata.search_val = start->search_val; edata.val = start->val; return find_event_hash(task, &edata); } static struct start_data * find_start(struct task_data *task, struct event_data *event_data, unsigned long long search_val) { unsigned long long key = trace_hash(search_val); struct event_data_match edata; void *data = &edata; struct trace_hash_item *item; struct start_data *start; edata.event_data = event_data; edata.search_val = search_val; item = trace_hash_find(&task->start_hash, key, match_start, data); if (!item) return NULL; start = start_from_item(item); return start; } struct stack_match { void *caller; unsigned long size; }; static int match_stack(struct trace_hash_item *item, void *data) { struct stack_data *stack = stack_from_item(item); struct stack_match *match = data; if (match->size != stack->size) return 0; return memcmp(stack->caller, match->caller, stack->size) == 0; } static void add_event_stack(struct event_hash *event_hash, void *caller, unsigned long size, unsigned long long time, unsigned long long ts) { unsigned long long key; struct stack_data *stack; struct stack_match match; struct trace_hash_item *item; int i; match.caller = caller; match.size = size; if (size < sizeof(int)) die("Stack size of less than sizeof(int)??"); for (key = 0, i = 0; i <= size - sizeof(int); i += sizeof(int)) key += trace_hash(*(int *)(caller + i)); item = trace_hash_find(&event_hash->stacks, key, match_stack, &match); if (!item) { stack = malloc(sizeof(*stack) + size); if (!stack) { warning("Could not allocate stack"); return; } memset(stack, 0, sizeof(*stack)); memcpy(&stack->caller, caller, size); stack->size = size; stack->hash.key = key; trace_hash_add(&event_hash->stacks, &stack->hash); } else stack = stack_from_item(item); stack->count++; stack->time += time; if (stack->count == 1 || time < stack->time_min) { stack->time_min = time; stack->ts_min = ts; } if (time > stack->time_max) { stack->time_max = time; stack->ts_max = ts; } } static void free_start(struct start_data *start) { if (start->task->last_start == start) start->task->last_start = NULL; if (start->stack.record) tracecmd_free_record(start->stack.record); trace_hash_del(&start->hash); list_del(&start->list); free(start); } static struct event_hash * add_and_free_start(struct task_data *task, struct start_data *start, struct event_data *event_data, unsigned long long ts) { struct event_hash *event_hash; long long delta; delta = ts - start->timestamp; /* * It's possible on a live trace, because of timestamps being * different on different CPUs, we can go back in time. When * that happens, just zero out the delta. */ if (delta < 0) delta = 0; event_hash = find_start_event_hash(task, event_data, start); if (!event_hash) return NULL; event_hash->count++; event_hash->time_total += delta; event_hash->last_time = delta; if (delta > event_hash->time_max) { event_hash->time_max = delta; event_hash->ts_max = ts; } if (event_hash->count == 1 || delta < event_hash->time_min) { event_hash->time_min = delta; event_hash->ts_min = ts; } if (start->stack.record) { unsigned long size; void *caller; size = start->stack.size; caller = start->stack.caller; add_event_stack(event_hash, caller, size, delta, start->stack.record->ts); tracecmd_free_record(start->stack.record); start->stack.record = NULL; } free_start(start); return event_hash; } static struct event_hash * find_and_update_start(struct task_data *task, struct event_data *event_data, unsigned long long ts, unsigned long long search_val) { struct start_data *start; start = find_start(task, event_data, search_val); if (!start) return NULL; return add_and_free_start(task, start, event_data, ts); } static int match_task(struct trace_hash_item *item, void *data) { struct task_data *task = task_from_item(item); int pid = *(unsigned long *)data; return task->pid == pid; } static void init_task(struct handle_data *h, struct task_data *task) { task->handle = h; trace_hash_init(&task->start_hash, 16); trace_hash_init(&task->event_hash, 32); } static struct task_data * add_task(struct handle_data *h, int pid) { unsigned long long key = trace_hash(pid); struct task_data *task; task = malloc(sizeof(*task)); if (!task) { warning("Could not allocate task"); return NULL; } memset(task, 0, sizeof(*task)); task->pid = pid; task->hash.key = key; trace_hash_add(&h->task_hash, &task->hash); init_task(h, task); return task; } static struct task_data * find_task(struct handle_data *h, int pid) { unsigned long long key = trace_hash(pid); struct trace_hash_item *item; static struct task_data *last_task; void *data = (unsigned long *)&pid; if (last_task && last_task->pid == pid) return last_task; item = trace_hash_find(&h->task_hash, key, match_task, data); if (item) last_task = task_from_item(item); else last_task = add_task(h, pid); return last_task; } static int match_group(struct trace_hash_item *item, void *data) { struct group_data *group = group_from_item(item); return strcmp(group->comm, (char *)data) == 0; } static void add_task_comm(struct task_data *task, struct tep_format_field *field, struct tep_record *record) { const char *comm; task->comm = malloc(field->size + 1); if (!task->comm) { warning("Could not allocate task comm"); return; } comm = record->data + field->offset; memcpy(task->comm, comm, field->size); task->comm[field->size] = 0; } /* Account for tasks that don't have starts */ static void account_task(struct task_data *task, struct event_data *event_data, struct tep_record *record) { struct event_data_match edata; struct event_hash *event_hash; struct task_data *proxy = NULL; unsigned long long search_val = 0; unsigned long long val = 0; unsigned long long pid; /* * If an event has the pid_field set, then find that task for * this event instead. Let this task proxy for it to handle * stack traces on this event. */ if (event_data->pid_field) { tep_read_number_field(event_data->pid_field, record->data, &pid); proxy = task; task = find_task(task->handle, pid); if (!task) return; proxy->proxy = task; } /* * If data_field is defined, use that for val, * if the start_field is defined, use that for search_val. */ if (event_data->data_field) { tep_read_number_field(event_data->data_field, record->data, &val); } if (event_data->start_match_field) { tep_read_number_field(event_data->start_match_field, record->data, &search_val); } edata.event_data = event_data; edata.search_val = val; edata.val = val; event_hash = find_event_hash(task, &edata); if (!event_hash) { warning("failed to allocate event_hash"); return; } event_hash->count++; task->last_event = event_hash; } static struct task_data * find_event_task(struct handle_data *h, struct event_data *event_data, struct tep_record *record, unsigned long long pid) { if (event_data->global) { if (event_data->migrate) return h->global_task; else return &h->global_percpu_tasks[record->cpu]; } /* If pid_field is defined, use that to find the task */ if (event_data->pid_field) tep_read_number_field(event_data->pid_field, record->data, &pid); return find_task(h, pid); } static struct task_data * handle_end_event(struct handle_data *h, struct event_data *event_data, struct tep_record *record, int pid) { struct event_hash *event_hash; struct task_data *task; unsigned long long val; task = find_event_task(h, event_data, record, pid); if (!task) return NULL; tep_read_number_field(event_data->start_match_field, record->data, &val); event_hash = find_and_update_start(task, event_data->start, record->ts, val); task->last_start = NULL; task->last_event = event_hash; return task; } static struct task_data * handle_start_event(struct handle_data *h, struct event_data *event_data, struct tep_record *record, unsigned long long pid) { struct start_data *start; struct task_data *task; unsigned long long val; task = find_event_task(h, event_data, record, pid); if (!task) return NULL; tep_read_number_field(event_data->end_match_field, record->data, &val); start = add_start(task, event_data, record, val, val); if (!start) { warning("Failed to allocate start of task"); return NULL; } task->last_start = start; task->last_event = NULL; return task; } static int handle_event_data(struct handle_data *h, unsigned long long pid, struct event_data *event_data, struct tep_record *record, int cpu) { struct task_data *task = NULL; /* If this is the end of a event pair (start is set) */ if (event_data->start) task = handle_end_event(h, event_data, record, pid); /* If this is the start of a event pair (end is set) */ if (event_data->end) { task = handle_start_event(h, event_data, record, pid); /* handle_start_event only returns NULL on error */ if (!task) return -1; } if (!task) { task = find_task(h, pid); if (!task) return -1; task->proxy = NULL; task->last_start = NULL; task->last_event = NULL; account_task(task, event_data, record); } return 0; } static void handle_missed_events(struct handle_data *h, int cpu) { struct start_data *start; struct start_data *n; /* Clear all starts on this CPU */ list_for_each_entry_safe(start, n, &h->cpu_starts[cpu], list) { free_start(start); } /* Now clear all starts whose events can migrate */ list_for_each_entry_safe(start, n, &h->migrate_starts, list) { free_start(start); } } static int match_event_data(struct trace_hash_item *item, void *data) { struct event_data *event_data = event_data_from_item(item); int id = (int)(unsigned long)data; return event_data->id == id; } static struct event_data * find_event_data(struct handle_data *h, int id) { struct trace_hash_item *item; unsigned long long key = trace_hash(id); void *data = (void *)(unsigned long)id; item = trace_hash_find(&h->events, key, match_event_data, data); if (item) return event_data_from_item(item); return NULL; } static void trace_profile_record(struct tracecmd_input *handle, struct tep_record *record) { static struct handle_data *last_handle; struct tep_record *stack_record; struct event_data *event_data; struct task_data *task; struct handle_data *h; struct tep_handle *pevent; unsigned long long pid; int cpu = record->cpu; int id; if (last_handle && last_handle->handle == handle) h = last_handle; else { for (h = handles; h; h = h->next) { if (h->handle == handle) break; } if (!h) die("Handle not found?"); last_handle = h; } if (record->missed_events) handle_missed_events(h, cpu); pevent = h->pevent; id = tep_data_type(pevent, record); event_data = find_event_data(h, id); if (!event_data) return; /* Get this current PID */ tep_read_number_field(h->common_pid, record->data, &pid); task = find_task(h, pid); if (!task) return; stack_record = task->last_stack; if (event_data->handle_event) event_data->handle_event(h, pid, event_data, record, cpu); else handle_event_data(h, pid, event_data, record, cpu); /* If the last stack hasn't changed, free it */ if (stack_record && task->last_stack == stack_record) { tracecmd_free_record(stack_record); task->last_stack = NULL; } } static struct event_data * add_event(struct handle_data *h, const char *system, const char *event_name, enum event_data_type type) { struct event_data *event_data; struct tep_event *event; event = tep_find_event_by_name(h->pevent, system, event_name); if (!event) return NULL; if (!h->common_pid) { h->common_pid = tep_find_common_field(event, "common_pid"); if (!h->common_pid) die("No 'common_pid' found in event"); } event_data = malloc(sizeof(*event_data)); if (!event_data) { warning("Could not allocate event_data"); return NULL; } memset(event_data, 0, sizeof(*event_data)); event_data->id = event->id; event_data->event = event; event_data->type = type; event_data->hash.key = trace_hash(event_data->event->id); trace_hash_add(&h->events, &event_data->hash); return event_data; } static void mate_events(struct handle_data *h, struct event_data *start, const char *pid_field, const char *end_match_field, struct event_data *end, const char *start_match_field, int migrate, int global) { start->end = end; end->start = start; if (pid_field) { start->pid_field = tep_find_field(start->event, pid_field); if (!start->pid_field) die("Event: %s does not have field %s", start->event->name, pid_field); } /* Field to match with end */ start->end_match_field = tep_find_field(start->event, end_match_field); if (!start->end_match_field) die("Event: %s does not have field %s", start->event->name, end_match_field); /* Field to match with start */ end->start_match_field = tep_find_field(end->event, start_match_field); if (!end->start_match_field) die("Event: %s does not have field %s", end->event->name, start_match_field); start->migrate = migrate; start->global = global; end->migrate = migrate; end->global = global; } /** * tracecmd_mate_events - match events to profile against * @handle: The input handle where the events exist. * @start_event: The event that starts the transaction * @pid_field: Use this over common_pid (may be NULL to use common_pid) * @end_match_field: The field that matches the end events @start_match_field * @end_event: The event that ends the transaction * @start_match_field: The end event field that matches start's @end_match_field * @migrate: Can the transaction switch CPUs? 1 for yes, 0 for no * @global: The events are global and not per task */ void tracecmd_mate_events(struct tracecmd_input *handle, struct tep_event *start_event, const char *pid_field, const char *end_match_field, struct tep_event *end_event, const char *start_match_field, int migrate, int global) { struct handle_data *h; struct event_data *start; struct event_data *end; for (h = handles; h; h = h->next) { if (h->handle == handle) break; } if (!h) die("Handle not found for trace profile"); start = add_event(h, start_event->system, start_event->name, EVENT_TYPE_USER_MATE); end = add_event(h, end_event->system, end_event->name, EVENT_TYPE_USER_MATE); if (!start || !end) return; mate_events(h, start, pid_field, end_match_field, end, start_match_field, migrate, global); } static void func_print(struct trace_seq *s, struct event_hash *event_hash) { const char *func; func = tep_find_function(event_hash->event_data->event->tep, event_hash->val); if (func) trace_seq_printf(s, "func: %s()", func); else trace_seq_printf(s, "func: 0x%llx", event_hash->val); } static void syscall_print(struct trace_seq *s, struct event_hash *event_hash) { #ifndef NO_AUDIT const char *name = NULL; int machine; machine = audit_detect_machine(); if (machine < 0) goto fail; name = audit_syscall_to_name(event_hash->val, machine); if (!name) goto fail; trace_seq_printf(s, "syscall:%s", name); return; fail: #endif trace_seq_printf(s, "%s:%d", event_hash->event_data->event->name, (int)event_hash->val); } /* From Linux include/linux/interrupt.h */ #define SOFTIRQS \ C(HI), \ C(TIMER), \ C(NET_TX), \ C(NET_RX), \ C(BLOCK), \ C(BLOCK_IOPOLL), \ C(TASKLET), \ C(SCHED), \ C(HRTIMER), \ C(RCU), \ C(NR), #undef C #define C(a) a##_SOFTIRQ enum { SOFTIRQS }; #undef C #define C(a) #a static const char *softirq_map[] = { SOFTIRQS }; static void softirq_print(struct trace_seq *s, struct event_hash *event_hash) { int softirq = (int)event_hash->val; if (softirq < NR_SOFTIRQ) trace_seq_printf(s, "%s:%s", event_hash->event_data->event->name, softirq_map[softirq]); else trace_seq_printf(s, "%s:%d", event_hash->event_data->event->name, softirq); } static void sched_switch_print(struct trace_seq *s, struct event_hash *event_hash) { const char states[] = TASK_STATE_TO_CHAR_STR; int i; trace_seq_printf(s, "%s:", event_hash->event_data->event->name); if (event_hash->val) { int val = event_hash->val; for (i = 0; val && i < sizeof(states) - 1; i++, val >>= 1) { if (val & 1) trace_seq_putc(s, states[i+1]); } } else trace_seq_putc(s, 'R'); } static int handle_sched_switch_event(struct handle_data *h, unsigned long long pid, struct event_data *event_data, struct tep_record *record, int cpu) { struct task_data *task; unsigned long long prev_pid; unsigned long long prev_state; unsigned long long next_pid; struct start_data *start; /* pid_field holds prev_pid, data_field holds prev_state */ tep_read_number_field(event_data->pid_field, record->data, &prev_pid); tep_read_number_field(event_data->data_field, record->data, &prev_state); /* only care about real states */ prev_state &= TASK_STATE_MAX - 1; /* end_match_field holds next_pid */ tep_read_number_field(event_data->end_match_field, record->data, &next_pid); task = find_task(h, prev_pid); if (!task) return -1; if (!task->comm) add_task_comm(task, h->switch_prev_comm, record); if (prev_state) task->sleeping = 1; else task->sleeping = 0; /* task is being scheduled out. prev_state tells why */ start = add_start(task, event_data, record, prev_pid, prev_state); task->last_start = start; task->last_event = NULL; task = find_task(h, next_pid); if (!task) return -1; if (!task->comm) add_task_comm(task, h->switch_next_comm, record); /* * If the next task was blocked, it required a wakeup to * restart, and there should be one. * But if it was preempted, we look for the previous sched switch. * Unfortunately, we have to look for both types of events as * we do not know why next_pid scheduled out. * * event_data->start holds the sched_wakeup event data. */ find_and_update_start(task, event_data->start, record->ts, next_pid); /* Look for this task if it was preempted (no wakeup found). */ find_and_update_start(task, event_data, record->ts, next_pid); return 0; } static int handle_stacktrace_event(struct handle_data *h, unsigned long long pid, struct event_data *event_data, struct tep_record *record, int cpu) { struct task_data *orig_task; struct task_data *proxy; struct task_data *task; unsigned long long size; struct event_hash *event_hash; struct start_data *start; void *caller; task = find_task(h, pid); if (!task) return -1; if (task->last_stack) { tracecmd_free_record(task->last_stack); task->last_stack = NULL; } if ((proxy = task->proxy)) { task->proxy = NULL; orig_task = task; task = proxy; } if (!task->last_start && !task->last_event) { /* * Save this stack in case function graph needs it. * Need the original task, not a proxy. */ if (proxy) task = orig_task; tracecmd_record_ref(record); task->last_stack = record; return 0; } /* * start_match_field holds the size. * data_field holds the caller location. */ size = record->size - event_data->data_field->offset; caller = record->data + event_data->data_field->offset; /* * If there's a "start" then don't add the stack until * it finds a matching "end". */ if ((start = task->last_start)) { tracecmd_record_ref(record); start->stack.record = record; start->stack.size = size; start->stack.caller = caller; task->last_start = NULL; task->last_event = NULL; return 0; } event_hash = task->last_event; task->last_event = NULL; add_event_stack(event_hash, caller, size, event_hash->last_time, record->ts); return 0; } static int handle_fgraph_entry_event(struct handle_data *h, unsigned long long pid, struct event_data *event_data, struct tep_record *record, int cpu) { unsigned long long size; struct start_data *start; struct task_data *task; void *caller; task = handle_start_event(h, event_data, record, pid); if (!task) return -1; /* * If a stack trace hasn't been used for a previous task, * then it could be a function trace that we can use for * the function graph. But stack traces come before the function * graph events (unfortunately). So we need to attach the previous * stack trace (if there is one) to this start event. */ if (task->last_stack) { start = task->last_start; record = task->last_stack; size = record->size - stacktrace_event->data_field->offset; caller = record->data + stacktrace_event->data_field->offset; start->stack.record = record; start->stack.size = size; start->stack.caller = caller; task->last_stack = NULL; task->last_event = NULL; } /* Do not map stacks after this event to this event */ task->last_start = NULL; return 0; } static int handle_fgraph_exit_event(struct handle_data *h, unsigned long long pid, struct event_data *event_data, struct tep_record *record, int cpu) { struct task_data *task; task = handle_end_event(h, event_data, record, pid); if (!task) return -1; /* Do not match stacks with function graph exit events */ task->last_event = NULL; return 0; } static int handle_process_exec(struct handle_data *h, unsigned long long pid, struct event_data *event_data, struct tep_record *record, int cpu) { struct task_data *task; unsigned long long val; /* Task has execed, remove the comm for it */ if (event_data->data_field) { tep_read_number_field(event_data->data_field, record->data, &val); pid = val; } task = find_task(h, pid); if (!task) return -1; free(task->comm); task->comm = NULL; return 0; } static int handle_sched_wakeup_event(struct handle_data *h, unsigned long long pid, struct event_data *event_data, struct tep_record *record, int cpu) { struct task_data *proxy; struct task_data *task = NULL; struct start_data *start; unsigned long long success; proxy = find_task(h, pid); if (!proxy) return -1; /* If present, data_field holds "success" */ if (event_data->data_field) { tep_read_number_field(event_data->data_field, record->data, &success); /* If not a successful wakeup, ignore this */ if (!success) return 0; } tep_read_number_field(event_data->pid_field, record->data, &pid); task = find_task(h, pid); if (!task) return -1; if (!task->comm) add_task_comm(task, h->wakeup_comm, record); /* if the task isn't sleeping, then ignore the wake up */ if (!task->sleeping) { /* Ignore any following stack traces */ proxy->proxy = NULL; proxy->last_start = NULL; proxy->last_event = NULL; return 0; } /* It's being woken up */ task->sleeping = 0; /* * We need the stack trace to be hooked to the woken up * task, not the waker. */ proxy->proxy = task; /* There should be a blocked schedule out of this task */ find_and_update_start(task, event_data->start, record->ts, pid); /* Set this up for timing how long the wakeup takes */ start = add_start(task, event_data, record, pid, pid); task->last_event = NULL; task->last_start = start; return 0; } void trace_init_profile(struct tracecmd_input *handle, struct hook_list *hook, int global) { struct tep_handle *pevent = tracecmd_get_tep(handle); struct tep_format_field **fields; struct handle_data *h; struct event_data *event_data; struct event_data *sched_switch; struct event_data *sched_wakeup; struct event_data *irq_entry; struct event_data *irq_exit; struct event_data *softirq_entry; struct event_data *softirq_exit; struct event_data *softirq_raise; struct event_data *fgraph_entry; struct event_data *fgraph_exit; struct event_data *syscall_enter; struct event_data *syscall_exit; struct event_data *process_exec; struct event_data *start_event; struct event_data *end_event; struct tep_event **events; int ret; int i; tracecmd_set_show_data_func(handle, trace_profile_record); h = malloc(sizeof(*h)); if (!h) { warning("Could not allocate handle"); return; }; memset(h, 0, sizeof(*h)); h->next = handles; handles = h; trace_hash_init(&h->task_hash, 1024); trace_hash_init(&h->events, 1024); trace_hash_init(&h->group_hash, 512); h->handle = handle; h->pevent = pevent; h->cpus = tracecmd_cpus(handle); /* * For streaming profiling, cpus will not be set up yet. * In this case, we simply use the number of cpus on the * system. */ if (!h->cpus) h->cpus = tracecmd_count_cpus(); list_head_init(&h->migrate_starts); h->cpu_starts = malloc(sizeof(*h->cpu_starts) * h->cpus); if (!h->cpu_starts) goto free_handle; for (i = 0; i < h->cpus; i++) list_head_init(&h->cpu_starts[i]); h->cpu_data = malloc(h->cpus * sizeof(*h->cpu_data)); if (!h->cpu_data) goto free_starts; memset(h->cpu_data, 0, h->cpus * sizeof(h->cpu_data)); h->global_task = malloc(sizeof(struct task_data)); if (!h->global_task) goto free_data; memset(h->global_task, 0, sizeof(struct task_data)); init_task(h, h->global_task); h->global_task->comm = strdup("Global Events"); if (!h->global_task->comm) die("malloc"); h->global_task->pid = -1; h->global_percpu_tasks = calloc(h->cpus, sizeof(struct task_data)); if (!h->global_percpu_tasks) die("malloc"); for (i = 0; i < h->cpus; i++) { init_task(h, &h->global_percpu_tasks[i]); ret = asprintf(&h->global_percpu_tasks[i].comm, "Global CPU[%d] Events", i); if (ret < 0) die("malloc"); h->global_percpu_tasks[i].pid = -1 - i; } irq_entry = add_event(h, "irq", "irq_handler_entry", EVENT_TYPE_IRQ); irq_exit = add_event(h, "irq", "irq_handler_exit", EVENT_TYPE_IRQ); softirq_entry = add_event(h, "irq", "softirq_entry", EVENT_TYPE_SOFTIRQ); softirq_exit = add_event(h, "irq", "softirq_exit", EVENT_TYPE_SOFTIRQ); softirq_raise = add_event(h, "irq", "softirq_raise", EVENT_TYPE_SOFTIRQ_RAISE); sched_wakeup = add_event(h, "sched", "sched_wakeup", EVENT_TYPE_WAKEUP); sched_switch = add_event(h, "sched", "sched_switch", EVENT_TYPE_SCHED_SWITCH); fgraph_entry = add_event(h, "ftrace", "funcgraph_entry", EVENT_TYPE_FUNC); fgraph_exit = add_event(h, "ftrace", "funcgraph_exit", EVENT_TYPE_FUNC); syscall_enter = add_event(h, "raw_syscalls", "sys_enter", EVENT_TYPE_SYSCALL); syscall_exit = add_event(h, "raw_syscalls", "sys_exit", EVENT_TYPE_SYSCALL); process_exec = add_event(h, "sched", "sched_process_exec", EVENT_TYPE_PROCESS_EXEC); stacktrace_event = add_event(h, "ftrace", "kernel_stack", EVENT_TYPE_STACK); if (stacktrace_event) { stacktrace_event->handle_event = handle_stacktrace_event; stacktrace_event->data_field = tep_find_field(stacktrace_event->event, "caller"); if (!stacktrace_event->data_field) die("Event: %s does not have field caller", stacktrace_event->event->name); } if (process_exec) { process_exec->handle_event = handle_process_exec; process_exec->data_field = tep_find_field(process_exec->event, "old_pid"); } if (sched_switch) { sched_switch->handle_event = handle_sched_switch_event; sched_switch->data_field = tep_find_field(sched_switch->event, "prev_state"); if (!sched_switch->data_field) die("Event: %s does not have field prev_state", sched_switch->event->name); h->switch_prev_comm = tep_find_field(sched_switch->event, "prev_comm"); if (!h->switch_prev_comm) die("Event: %s does not have field prev_comm", sched_switch->event->name); h->switch_next_comm = tep_find_field(sched_switch->event, "next_comm"); if (!h->switch_next_comm) die("Event: %s does not have field next_comm", sched_switch->event->name); sched_switch->print_func = sched_switch_print; } if (sched_switch && sched_wakeup) { mate_events(h, sched_switch, "prev_pid", "next_pid", sched_wakeup, "pid", 1, 0); mate_events(h, sched_wakeup, "pid", "pid", sched_switch, "prev_pid", 1, 0); sched_wakeup->handle_event = handle_sched_wakeup_event; /* The 'success' field may or may not be present */ sched_wakeup->data_field = tep_find_field(sched_wakeup->event, "success"); h->wakeup_comm = tep_find_field(sched_wakeup->event, "comm"); if (!h->wakeup_comm) die("Event: %s does not have field comm", sched_wakeup->event->name); } if (irq_entry && irq_exit) mate_events(h, irq_entry, NULL, "irq", irq_exit, "irq", 0, global); if (softirq_entry) softirq_entry->print_func = softirq_print; if (softirq_exit) softirq_exit->print_func = softirq_print; if (softirq_raise) softirq_raise->print_func = softirq_print; if (softirq_entry && softirq_exit) mate_events(h, softirq_entry, NULL, "vec", softirq_exit, "vec", 0, global); if (softirq_entry && softirq_raise) mate_events(h, softirq_raise, NULL, "vec", softirq_entry, "vec", 0, global); if (fgraph_entry && fgraph_exit) { mate_events(h, fgraph_entry, NULL, "func", fgraph_exit, "func", 1, 0); fgraph_entry->handle_event = handle_fgraph_entry_event; fgraph_exit->handle_event = handle_fgraph_exit_event; fgraph_entry->print_func = func_print; } if (syscall_enter && syscall_exit) { mate_events(h, syscall_enter, NULL, "id", syscall_exit, "id", 1, 0); syscall_enter->print_func = syscall_print; syscall_exit->print_func = syscall_print; } events = tep_list_events(pevent, TEP_EVENT_SORT_ID); if (!events) die("malloc"); /* Add some other events */ event_data = add_event(h, "ftrace", "function", EVENT_TYPE_FUNC); if (event_data) { event_data->data_field = tep_find_field(event_data->event, "ip"); } /* Add any user defined hooks */ for (; hook; hook = hook->next) { start_event = add_event(h, hook->start_system, hook->start_event, EVENT_TYPE_USER_MATE); end_event = add_event(h, hook->end_system, hook->end_event, EVENT_TYPE_USER_MATE); if (!start_event) { warning("Event %s not found", hook->start_event); continue; } if (!end_event) { warning("Event %s not found", hook->end_event); continue; } mate_events(h, start_event, hook->pid, hook->start_match, end_event, hook->end_match, hook->migrate, hook->global); } /* Now add any defined event that we haven't processed */ for (i = 0; events[i]; i++) { event_data = find_event_data(h, events[i]->id); if (event_data) continue; event_data = add_event(h, events[i]->system, events[i]->name, EVENT_TYPE_UNDEFINED); fields = tep_event_fields(events[i]); if (!fields) die("malloc"); if (fields[0]) event_data->data_field = fields[0]; free(fields); } return; free_data: free(h->cpu_data); free_starts: free(h->cpu_starts); free_handle: handles = h->next; free(h); warning("Failed handle allocations"); } static void output_event_stack(struct tep_handle *pevent, struct stack_data *stack) { int longsize = tep_get_long_size(pevent); unsigned long long val; const char *func; unsigned long long stop = -1ULL; void *ptr; int i; if (longsize < 8) stop &= (1ULL << (longsize * 8)) - 1; if (stack->count) stack->time_avg = stack->time / stack->count; printf(" %lld total:%lld min:%lld(ts:%lld.%06lld) max:%lld(ts:%lld.%06lld) avg=%lld\n", stack->count, stack->time, stack->time_min, nsecs_per_sec(stack->ts_min), mod_to_usec(stack->ts_min), stack->time_max, nsecs_per_sec(stack->ts_max), mod_to_usec(stack->ts_max), stack->time_avg); for (i = 0; i < stack->size; i += longsize) { ptr = stack->caller + i; switch (longsize) { case 4: /* todo, read value from pevent */ val = *(unsigned int *)ptr; break; case 8: val = *(unsigned long long *)ptr; break; default: die("Strange long size %d", longsize); } if (val == stop) break; func = tep_find_function(pevent, val); if (func) printf(" => %s (0x%llx)\n", func, val); else printf(" => 0x%llx\n", val); } } struct stack_chain { struct stack_chain *children; unsigned long long val; unsigned long long time; unsigned long long time_min; unsigned long long ts_min; unsigned long long time_max; unsigned long long ts_max; unsigned long long time_avg; unsigned long long count; int percent; int nr_children; }; static int compare_chains(const void *a, const void *b) { const struct stack_chain * A = a; const struct stack_chain * B = b; if (A->time > B->time) return -1; if (A->time < B->time) return 1; /* If stacks don't use time, then use count */ if (A->count > B->count) return -1; if (A->count < B->count) return 1; return 0; } static int calc_percent(unsigned long long val, unsigned long long total) { return (val * 100 + total / 2) / total; } static int stack_overflows(struct stack_data *stack, int longsize, int level) { return longsize * level > stack->size - longsize; } static unsigned long long stack_value(struct stack_data *stack, int longsize, int level) { void *ptr; ptr = &stack->caller[longsize * level]; return longsize == 8 ? *(u64 *)ptr : *(unsigned *)ptr; } static struct stack_chain * make_stack_chain(struct stack_data **stacks, int cnt, int longsize, int level, int *nr_children) { struct stack_chain *chain; unsigned long long total_time = 0; unsigned long long total_count = 0; unsigned long long time; unsigned long long time_min; unsigned long long ts_min; unsigned long long time_max; unsigned long long ts_max; unsigned long long count; unsigned long long stop = -1ULL; int nr_chains = 0; u64 last = 0; u64 val; int start; int i; int x; if (longsize < 8) stop &= (1ULL << (longsize * 8)) - 1; /* First find out how many diffs there are */ for (i = 0; i < cnt; i++) { if (stack_overflows(stacks[i], longsize, level)) continue; val = stack_value(stacks[i], longsize, level); if (val == stop) continue; if (!nr_chains || val != last) nr_chains++; last = val; } if (!nr_chains) { *nr_children = 0; return NULL; } chain = malloc(sizeof(*chain) * nr_chains); if (!chain) { warning("Could not allocate chain"); return NULL; } memset(chain, 0, sizeof(*chain) * nr_chains); x = 0; count = 0; start = 0; time = 0; time_min = 0; time_max = 0; for (i = 0; i < cnt; i++) { if (stack_overflows(stacks[i], longsize, level)) { start = i+1; continue; } val = stack_value(stacks[i], longsize, level); if (val == stop) { start = i+1; continue; } count += stacks[i]->count; time += stacks[i]->time; if (stacks[i]->time_max > time_max) { time_max = stacks[i]->time_max; ts_max = stacks[i]->ts_max; } if (i == start || stacks[i]->time_min < time_min) { time_min = stacks[i]->time_min; ts_min = stacks[i]->ts_min; } if (i == cnt - 1 || stack_overflows(stacks[i+1], longsize, level) || val != stack_value(stacks[i+1], longsize, level)) { total_time += time; total_count += count; chain[x].val = val; chain[x].time_avg = time / count; chain[x].count = count; chain[x].time = time; chain[x].time_min = time_min; chain[x].ts_min = ts_min; chain[x].time_max = time_max; chain[x].ts_max = ts_max; chain[x].children = make_stack_chain(&stacks[start], (i - start) + 1, longsize, level+1, &chain[x].nr_children); x++; start = i + 1; count = 0; time = 0; time_min = 0; time_max = 0; } } qsort(chain, nr_chains, sizeof(*chain), compare_chains); *nr_children = nr_chains; /* Should never happen */ if (!total_time && !total_count) return chain; /* Now calculate percentage */ time = 0; for (i = 0; i < nr_chains; i++) { if (total_time) chain[i].percent = calc_percent(chain[i].time, total_time); /* In case stacks don't have time */ else if (total_count) chain[i].percent = calc_percent(chain[i].count, total_count); } return chain; } static void free_chain(struct stack_chain *chain, int nr_chains) { int i; if (!chain) return; for (i = 0; i < nr_chains; i++) free_chain(chain[i].children, chain[i].nr_children); free(chain); } #define INDENT 5 static void print_indent(int level, unsigned long long mask) { char line; int p; for (p = 0; p < level + 1; p++) { if (mask & (1ULL << p)) line = '|'; else line = ' '; printf("%*c ", INDENT, line); } } static void print_chain_func(struct tep_handle *pevent, struct stack_chain *chain) { unsigned long long val = chain->val; const char *func; func = tep_find_function(pevent, val); if (func) printf("%s (0x%llx)\n", func, val); else printf("0x%llx\n", val); } static void output_chain(struct tep_handle *pevent, struct stack_chain *chain, int level, int nr_chains, unsigned long long *mask) { struct stack_chain *child; int nr_children; int i; char line = '|'; if (!nr_chains) return; *mask |= (1ULL << (level + 1)); print_indent(level + 1, *mask); printf("\n"); for (i = 0; i < nr_chains; i++) { print_indent(level, *mask); printf("%*c ", INDENT, '+'); if (i == nr_chains - 1) { *mask &= ~(1ULL << (level + 1)); line = ' '; } print_chain_func(pevent, &chain[i]); print_indent(level, *mask); printf("%*c ", INDENT, line); printf(" %d%% (%lld)", chain[i].percent, chain[i].count); if (chain[i].time) printf(" time:%lld max:%lld(ts:%lld.%06lld) min:%lld(ts:%lld.%06lld) avg:%lld", chain[i].time, chain[i].time_max, nsecs_per_sec(chain[i].ts_max), mod_to_usec(chain[i].ts_max), chain[i].time_min, nsecs_per_sec(chain[i].ts_min), mod_to_usec(chain[i].ts_min), chain[i].time_avg); printf("\n"); for (child = chain[i].children, nr_children = chain[i].nr_children; child && nr_children == 1; nr_children = child->nr_children, child = child->children) { print_indent(level, *mask); printf("%*c ", INDENT, line); printf(" "); print_chain_func(pevent, child); } if (child) output_chain(pevent, child, level+1, nr_children, mask); print_indent(level + 1, *mask); printf("\n"); } *mask &= ~(1ULL << (level + 1)); print_indent(level, *mask); printf("\n"); } static int compare_stacks(const void *a, const void *b) { struct stack_data * const *A = a; struct stack_data * const *B = b; unsigned int sa, sb; int size; int i; /* only compare up to the smaller size of the two */ if ((*A)->size > (*B)->size) size = (*B)->size; else size = (*A)->size; for (i = 0; i < size; i += sizeof(sa)) { sa = *(unsigned *)&(*A)->caller[i]; sb = *(unsigned *)&(*B)->caller[i]; if (sa > sb) return 1; if (sa < sb) return -1; } /* They are the same up to size. Then bigger size wins */ if ((*A)->size > (*B)->size) return 1; if ((*A)->size < (*B)->size) return -1; return 0; } static void output_stacks(struct tep_handle *pevent, struct trace_hash *stack_hash) { struct trace_hash_item **bucket; struct trace_hash_item *item; struct stack_data **stacks; struct stack_chain *chain; unsigned long long mask = 0; int nr_chains; int longsize = tep_get_long_size(pevent); int nr_stacks; int i; nr_stacks = 0; trace_hash_for_each_bucket(bucket, stack_hash) { trace_hash_for_each_item(item, bucket) { nr_stacks++; } } stacks = malloc(sizeof(*stacks) * nr_stacks); if (!stacks) { warning("Could not allocate stacks"); return; } nr_stacks = 0; trace_hash_for_each_bucket(bucket, stack_hash) { trace_hash_for_each_item(item, bucket) { stacks[nr_stacks++] = stack_from_item(item); } } qsort(stacks, nr_stacks, sizeof(*stacks), compare_stacks); chain = make_stack_chain(stacks, nr_stacks, longsize, 0, &nr_chains); output_chain(pevent, chain, 0, nr_chains, &mask); if (0) for (i = 0; i < nr_stacks; i++) output_event_stack(pevent, stacks[i]); free(stacks); free_chain(chain, nr_chains); } static void output_event(struct event_hash *event_hash) { struct event_data *event_data = event_hash->event_data; struct tep_handle *pevent = event_data->event->tep; struct trace_seq s; trace_seq_init(&s); if (event_data->print_func) event_data->print_func(&s, event_hash); else if (event_data->type == EVENT_TYPE_FUNC) func_print(&s, event_hash); else trace_seq_printf(&s, "%s:0x%llx", event_data->event->name, event_hash->val); trace_seq_terminate(&s); printf(" Event: %s (%lld)", s.buffer, event_hash->count); trace_seq_destroy(&s); if (event_hash->time_total) { event_hash->time_avg = event_hash->time_total / event_hash->count; printf(" Total: %lld Avg: %lld Max: %lld(ts:%lld.%06lld) Min:%lld(ts:%lld.%06lld)", event_hash->time_total, event_hash->time_avg, event_hash->time_max, nsecs_per_sec(event_hash->ts_max), mod_to_usec(event_hash->ts_max), event_hash->time_min, nsecs_per_sec(event_hash->ts_min), mod_to_usec(event_hash->ts_min)); } printf("\n"); output_stacks(pevent, &event_hash->stacks); } static int compare_events(const void *a, const void *b) { struct event_hash * const *A = a; struct event_hash * const *B = b; const struct event_data *event_data_a = (*A)->event_data; const struct event_data *event_data_b = (*B)->event_data; /* Schedule switch goes first */ if (event_data_a->type == EVENT_TYPE_SCHED_SWITCH) { if (event_data_b->type != EVENT_TYPE_SCHED_SWITCH) return -1; /* lower the state the better */ if ((*A)->val > (*B)->val) return 1; if ((*A)->val < (*B)->val) return -1; return 0; } else if (event_data_b->type == EVENT_TYPE_SCHED_SWITCH) return 1; /* Wakeups are next */ if (event_data_a->type == EVENT_TYPE_WAKEUP) { if (event_data_b->type != EVENT_TYPE_WAKEUP) return -1; return 0; } else if (event_data_b->type == EVENT_TYPE_WAKEUP) return 1; if (event_data_a->id > event_data_b->id) return 1; if (event_data_a->id < event_data_b->id) return -1; if ((*A)->time_total > (*B)->time_total) return -1; if ((*A)->time_total < (*B)->time_total) return 1; return 0; } static void output_task(struct handle_data *h, struct task_data *task) { struct trace_hash_item **bucket; struct trace_hash_item *item; struct event_hash **events; const char *comm; int nr_events = 0; int i; if (task->group) return; if (task->comm) comm = task->comm; else comm = tep_data_comm_from_pid(h->pevent, task->pid); if (task->pid < 0) printf("%s\n", task->comm); else printf("\ntask: %s-%d\n", comm, task->pid); trace_hash_for_each_bucket(bucket, &task->event_hash) { trace_hash_for_each_item(item, bucket) { nr_events++; } } events = malloc(sizeof(*events) * nr_events); if (!events) { warning("Could not allocate events"); return; } i = 0; trace_hash_for_each_bucket(bucket, &task->event_hash) { trace_hash_for_each_item(item, bucket) { events[i++] = event_from_item(item); } } qsort(events, nr_events, sizeof(*events), compare_events); for (i = 0; i < nr_events; i++) output_event(events[i]); free(events); } static void output_group(struct handle_data *h, struct group_data *group) { struct trace_hash_item **bucket; struct trace_hash_item *item; struct event_hash **events; int nr_events = 0; int i; printf("\ngroup: %s\n", group->comm); trace_hash_for_each_bucket(bucket, &group->event_hash) { trace_hash_for_each_item(item, bucket) { nr_events++; } } events = malloc(sizeof(*events) * nr_events); if (!events) { warning("Could not allocate events"); return; } i = 0; trace_hash_for_each_bucket(bucket, &group->event_hash) { trace_hash_for_each_item(item, bucket) { events[i++] = event_from_item(item); } } qsort(events, nr_events, sizeof(*events), compare_events); for (i = 0; i < nr_events; i++) output_event(events[i]); free(events); } static int compare_tasks(const void *a, const void *b) { struct task_data * const *A = a; struct task_data * const *B = b; if ((*A)->pid > (*B)->pid) return 1; else if ((*A)->pid < (*B)->pid) return -1; return 0; } static int compare_groups(const void *a, const void *b) { const char *A = a; const char *B = b; return strcmp(A, B); } static void free_event_hash(struct event_hash *event_hash) { struct trace_hash_item **bucket; struct trace_hash_item *item; struct stack_data *stack; trace_hash_for_each_bucket(bucket, &event_hash->stacks) { trace_hash_while_item(item, bucket) { stack = stack_from_item(item); trace_hash_del(&stack->hash); free(stack); } } trace_hash_free(&event_hash->stacks); free(event_hash); } static void __free_task(struct task_data *task) { struct trace_hash_item **bucket; struct trace_hash_item *item; struct start_data *start; struct event_hash *event_hash; free(task->comm); trace_hash_for_each_bucket(bucket, &task->start_hash) { trace_hash_while_item(item, bucket) { start = start_from_item(item); if (start->stack.record) tracecmd_free_record(start->stack.record); list_del(&start->list); trace_hash_del(item); free(start); } } trace_hash_free(&task->start_hash); trace_hash_for_each_bucket(bucket, &task->event_hash) { trace_hash_while_item(item, bucket) { event_hash = event_from_item(item); trace_hash_del(item); free_event_hash(event_hash); } } trace_hash_free(&task->event_hash); if (task->last_stack) tracecmd_free_record(task->last_stack); } static void free_task(struct task_data *task) { __free_task(task); free(task); } static void free_group(struct group_data *group) { struct trace_hash_item **bucket; struct trace_hash_item *item; struct event_hash *event_hash; free(group->comm); trace_hash_for_each_bucket(bucket, &group->event_hash) { trace_hash_while_item(item, bucket) { event_hash = event_from_item(item); trace_hash_del(item); free_event_hash(event_hash); } } trace_hash_free(&group->event_hash); free(group); } static void show_global_task(struct handle_data *h, struct task_data *task) { if (trace_hash_empty(&task->event_hash)) return; output_task(h, task); } static void output_tasks(struct handle_data *h) { struct trace_hash_item **bucket; struct trace_hash_item *item; struct task_data **tasks; int nr_tasks = 0; int i; trace_hash_for_each_bucket(bucket, &h->task_hash) { trace_hash_for_each_item(item, bucket) { nr_tasks++; } } tasks = malloc(sizeof(*tasks) * nr_tasks); if (!tasks) { warning("Could not allocate tasks"); return; } nr_tasks = 0; trace_hash_for_each_bucket(bucket, &h->task_hash) { trace_hash_while_item(item, bucket) { tasks[nr_tasks++] = task_from_item(item); trace_hash_del(item); } } qsort(tasks, nr_tasks, sizeof(*tasks), compare_tasks); for (i = 0; i < nr_tasks; i++) { output_task(h, tasks[i]); free_task(tasks[i]); } free(tasks); } static void output_groups(struct handle_data *h) { struct trace_hash_item **bucket; struct trace_hash_item *item; struct group_data **groups; int nr_groups = 0; int i; trace_hash_for_each_bucket(bucket, &h->group_hash) { trace_hash_for_each_item(item, bucket) { nr_groups++; } } if (nr_groups == 0) return; groups = malloc(sizeof(*groups) * nr_groups); if (!groups) { warning("Could not allocate groups"); return; } nr_groups = 0; trace_hash_for_each_bucket(bucket, &h->group_hash) { trace_hash_while_item(item, bucket) { groups[nr_groups++] = group_from_item(item); trace_hash_del(item); } } qsort(groups, nr_groups, sizeof(*groups), compare_groups); for (i = 0; i < nr_groups; i++) { output_group(h, groups[i]); free_group(groups[i]); } free(groups); } static void output_handle(struct handle_data *h) { int i; show_global_task(h, h->global_task); for (i = 0; i < h->cpus; i++) show_global_task(h, &h->global_percpu_tasks[i]); output_groups(h); output_tasks(h); } static void merge_event_stack(struct event_hash *event, struct stack_data *stack) { struct stack_data *exist; struct trace_hash_item *item; struct stack_match match; match.caller = stack->caller; match.size = stack->size; item = trace_hash_find(&event->stacks, stack->hash.key, match_stack, &match); if (!item) { trace_hash_add(&event->stacks, &stack->hash); return; } exist = stack_from_item(item); exist->count += stack->count; exist->time += stack->time; if (exist->time_max < stack->time_max) { exist->time_max = stack->time_max; exist->ts_max = stack->ts_max; } if (exist->time_min > stack->time_min) { exist->time_min = stack->time_min; exist->ts_min = stack->ts_min; } free(stack); } static void merge_stacks(struct event_hash *exist, struct event_hash *event) { struct stack_data *stack; struct trace_hash_item *item; struct trace_hash_item **bucket; trace_hash_for_each_bucket(bucket, &event->stacks) { trace_hash_while_item(item, bucket) { stack = stack_from_item(item); trace_hash_del(&stack->hash); merge_event_stack(exist, stack); } } } static void merge_event_into_group(struct group_data *group, struct event_hash *event) { struct event_hash *exist; struct trace_hash_item *item; struct event_data_match edata; unsigned long long key; if (event->event_data->type == EVENT_TYPE_WAKEUP) { edata.event_data = event->event_data; event->search_val = 0; event->val = 0; key = trace_hash((unsigned long)event->event_data); } else if (event->event_data->type == EVENT_TYPE_SCHED_SWITCH) { edata.event_data = event->event_data; event->search_val = event->val; key = (unsigned long)event->event_data + ((unsigned long)event->val * 2); key = trace_hash(key); } else { key = event->hash.key; } edata.event_data = event->event_data; edata.search_val = event->search_val; edata.val = event->val; item = trace_hash_find(&group->event_hash, key, match_event, &edata); if (!item) { event->hash.key = key; trace_hash_add(&group->event_hash, &event->hash); return; } exist = event_from_item(item); exist->count += event->count; exist->time_total += event->time_total; if (exist->time_max < event->time_max) { exist->time_max = event->time_max; exist->ts_max = event->ts_max; } if (exist->time_min > event->time_min) { exist->time_min = event->time_min; exist->ts_min = event->ts_min; } merge_stacks(exist, event); free_event_hash(event); } static void add_group(struct handle_data *h, struct task_data *task) { unsigned long long key; struct trace_hash_item *item; struct group_data *grp; struct trace_hash_item **bucket; void *data = task->comm; if (!task->comm) return; key = trace_hash_str(task->comm); item = trace_hash_find(&h->group_hash, key, match_group, data); if (item) { grp = group_from_item(item); } else { grp = malloc(sizeof(*grp)); if (!grp) { warning("Could not allocate group"); return; } memset(grp, 0, sizeof(*grp)); grp->comm = strdup(task->comm); if (!grp->comm) die("strdup"); grp->hash.key = key; trace_hash_add(&h->group_hash, &grp->hash); trace_hash_init(&grp->event_hash, 32); } task->group = grp; trace_hash_for_each_bucket(bucket, &task->event_hash) { trace_hash_while_item(item, bucket) { struct event_hash *event_hash; event_hash = event_from_item(item); trace_hash_del(&event_hash->hash); merge_event_into_group(grp, event_hash); } } } static void merge_tasks(struct handle_data *h) { struct trace_hash_item **bucket; struct trace_hash_item *item; if (!merge_like_comms) return; trace_hash_for_each_bucket(bucket, &h->task_hash) { trace_hash_for_each_item(item, bucket) add_group(h, task_from_item(item)); } } int do_trace_profile(void) { struct handle_data *h; for (h = handles; h; h = h->next) { if (merge_like_comms) merge_tasks(h); output_handle(h); trace_hash_free(&h->task_hash); } return 0; } trace-cmd-v3.3.1/tracecmd/trace-read.c000066400000000000000000001303561470231550600175130ustar00rootroot00000000000000// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt * */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "trace-local.h" #include "trace-hash.h" #include "trace-hash-local.h" #include "kbuffer.h" #include "list.h" /* * tep_func_repeat_format is defined as a weak variable in the * libtraceevent library function plugin, to allow applications * to override the format of the timestamp it prints for the * last function that repeated. */ const char *tep_func_repeat_format; static struct filter_str { struct filter_str *next; char *filter; int neg; } *filter_strings; static struct filter_str **filter_next = &filter_strings; struct event_str { struct event_str *next; const char *event; }; struct input_files; struct handle_list { struct list_head list; struct tracecmd_input *handle; struct input_files *input_file; const char *file; int cpus; }; static struct list_head handle_list; struct input_files { struct list_head list; const char *file; struct filter_str *filter_str; struct filter_str **filter_str_next; long long tsoffset; unsigned long long ts2secs; }; static struct list_head input_files; static struct input_files *last_input_file; struct pid_list { struct pid_list *next; char *pid; int free; } *pid_list; struct pid_list *comm_list; static unsigned int page_size; static int input_fd; static const char *default_input_file = DEFAULT_INPUT_FILE; static const char *input_file; static int multi_inputs; static int max_file_size; static int instances; static int *filter_cpus; static int nr_filter_cpus; static int test_filters_mode; static int show_wakeup; static int wakeup_id; static int wakeup_new_id; static int sched_id; static int profile; static int buffer_breaks = 0; static int no_irqs; static int no_softirqs; static int tsdiff; static int tscheck; static int latency_format; static bool raw_format; static const char *format_type = TEP_PRINT_INFO; static struct tep_format_field *wakeup_task; static struct tep_format_field *wakeup_success; static struct tep_format_field *wakeup_new_task; static struct tep_format_field *wakeup_new_success; static struct tep_format_field *sched_task; static struct tep_format_field *sched_prio; static unsigned long long total_wakeup_lat; static unsigned long wakeup_lat_count; static unsigned long long total_wakeup_rt_lat; static unsigned long wakeup_rt_lat_count; struct wakeup_info { struct trace_hash_item hash; unsigned long long start; int pid; }; static struct hook_list *hooks; static struct hook_list *last_hook; #define WAKEUP_HASH_SIZE 1024 static struct trace_hash wakeup_hash; static void print_event_name(struct trace_seq *s, struct tep_event *event) { static const char *spaces = " "; /* 20 spaces */ const char *name; int len; name = event ? event->name : "(NULL)"; trace_seq_printf(s, " %s: ", name); /* Space out the event names evenly. */ len = strlen(name); if (len < 20) trace_seq_printf(s, "%.*s", 20 - len, spaces); } enum time_fmt { TIME_FMT_LAT = 1, TIME_FMT_NORMAL, TIME_FMT_TS, }; static const char *time_format(struct tracecmd_input *handle, enum time_fmt tf) { struct tep_handle *tep = tracecmd_get_tep(handle); switch (tf) { case TIME_FMT_LAT: if (latency_format) return "%8.8s-%-5d %3d"; return "%16s-%-5d [%03d]"; default: if (tracecmd_get_flags(handle) & TRACECMD_FL_IN_USECS) { if (tep_test_flag(tep, TEP_NSEC_OUTPUT)) return tf == TIME_FMT_NORMAL ? " %9.1d:" : "%9.1d"; else return tf == TIME_FMT_NORMAL ? " %6.1000d:" : "%6.1000d"; } else return tf == TIME_FMT_NORMAL ? "%12d:" : "%12d"; } } static void print_event(struct trace_seq *s, struct tracecmd_input *handle, struct tep_record *record) { struct tep_handle *tep = tracecmd_get_tep(handle); struct tep_event *event; const char *lfmt = time_format(handle, TIME_FMT_LAT); const char *tfmt = time_format(handle, TIME_FMT_NORMAL); event = tep_find_event_by_record(tep, record); tep_print_event(tep, s, record, lfmt, TEP_PRINT_COMM, TEP_PRINT_PID, TEP_PRINT_CPU); tep_print_event(tep, s, record, tfmt, TEP_PRINT_TIME); print_event_name(s, event); tep_print_event(tep, s, record, "%s", format_type); } /* Debug variables for testing tracecmd_read_at */ #define TEST_READ_AT 0 #if TEST_READ_AT #define DO_TEST static off_t test_read_at_offset; static int test_read_at_copy = 100; static int test_read_at_index; static void show_test(struct tracecmd_input *handle) { struct tep_record *record; struct trace_seq s; if (!test_read_at_offset) { printf("\nNO RECORD COPIED\n"); return; } record = tracecmd_read_at(handle, test_read_at_offset, NULL); printf("\nHERE'S THE COPY RECORD\n"); trace_seq_init(&s); print_event(&s, handle, record); trace_seq_do_printf(&s); trace_seq_destroy(&s); printf("\n"); tracecmd_free_record(record); } static void test_save(struct tep_record *record, int cpu) { if (test_read_at_index++ == test_read_at_copy) { test_read_at_offset = record->offset; printf("\nUSING THIS RECORD\n"); } } #endif /* TEST_READ_AT */ /* Debug variables for testing tracecmd_set_cpu_at_timestamp */ #define TEST_AT_TIMESTAMP 0 #if TEST_AT_TIMESTAMP #define DO_TEST static unsigned long long test_at_timestamp_ts; static int test_at_timestamp_copy = 100; static int test_at_timestamp_cpu = -1; static int test_at_timestamp_index; static void show_test(struct tracecmd_input *handle) { struct tep_record *record; struct trace_seq s; int cpu = test_at_timestamp_cpu; if (!test_at_timestamp_ts) { printf("\nNO RECORD COPIED\n"); return; } if (tracecmd_set_cpu_to_timestamp(handle, cpu, test_at_timestamp_ts)) return; record = tracecmd_read_data(handle, cpu); printf("\nHERE'S THE COPY RECORD with page %p offset=%p\n", (void *)(record->offset & ~(page_size - 1)), (void *)record->offset); trace_seq_init(&s); print_event(&s, handle, record); trace_seq_do_printf(&s); trace_seq_destroy(&s); printf("\n"); tracecmd_free_record(record); } static void test_save(struct tep_record *record, int cpu) { if (test_at_timestamp_index++ == test_at_timestamp_copy) { test_at_timestamp_ts = record->ts; test_at_timestamp_cpu = cpu; printf("\nUSING THIS RECORD page=%p offset=%p\n", (void *)(record->offset & ~(page_size - 1)), (void *)record->offset); } } #endif /* TEST_AT_TIMESTAMP */ #define TEST_FIRST_LAST 0 #if TEST_FIRST_LAST #define DO_TEST static void show_test(struct tracecmd_input *handle) { struct tep_record *record; struct trace_seq s; int cpu = 0; record = tracecmd_read_cpu_first(handle, cpu); if (!record) { printf("No first record?\n"); return; } printf("\nHERE'S THE FIRST RECORD with offset %p\n", (void *)record->offset); trace_seq_init(&s); print_event(&s, handle, record); trace_seq_do_printf(&s); trace_seq_destroy(&s); printf("\n"); tracecmd_free_record(record); record = tracecmd_read_cpu_last(handle, cpu); if (!record) { printf("No last record?\n"); return; } printf("\nHERE'S THE LAST RECORD with offset %p\n", (void *)record->offset); trace_seq_init(&s); print_event(&s, handle, record); trace_seq_do_printf(&s); trace_seq_destroy(&s); printf("\n"); tracecmd_free_record(record); } static void test_save(struct tep_record *record, int cpu) { } #endif /* TEST_FIRST_LAST */ #ifndef DO_TEST static void show_test(struct tracecmd_input *handle) { /* quiet the compiler */ if (0) print_event(NULL, NULL, NULL); } static void test_save(struct tep_record *record, int cpu) { } #endif static void free_filter_strings(struct filter_str *filter_str) { struct filter_str *filter; while (filter_str) { filter = filter_str; filter_str = filter->next; free(filter->filter); free(filter); } } static struct input_files *add_input(const char *file) { struct input_files *item; item = calloc(1, sizeof(*item)); if (!item) die("Failed to allocate for %s", file); item->file = file; item->filter_str_next = &item->filter_str; list_add_tail(&item->list, &input_files); last_input_file = item; return item; } static void add_handle(struct tracecmd_input *handle, struct input_files *input_files) { struct handle_list *item; const char *file = input_files ? input_files->file : input_file; item = calloc(1, sizeof(*item)); if (!item) die("Failed ot allocate for %s", file); item->handle = handle; if (input_files) { item->file = file + strlen(file); /* we want just the base name */ while (item->file >= file && *item->file != '/') item->file--; item->file++; if (strlen(item->file) > max_file_size) max_file_size = strlen(item->file); item->input_file = input_files; } list_add_tail(&item->list, &handle_list); } static void free_inputs(void) { struct input_files *item; while (!list_empty(&input_files)) { item = container_of(input_files.next, struct input_files, list); list_del(&item->list); free_filter_strings(item->filter_str); free(item); } } static void free_handles(void) { struct handle_list *item; while (!list_empty(&handle_list)) { item = container_of(handle_list.next, struct handle_list, list); list_del(&item->list); free(item); } } static void add_filter(struct input_files *input_file, const char *filter, int neg) { struct filter_str *ftr; ftr = malloc(sizeof(*ftr)); if (!ftr) die("Failed to allocate for filter %s", filter); ftr->filter = strdup(filter); if (!ftr->filter) die("malloc"); ftr->next = NULL; ftr->neg = neg; /* must maintain order of command line */ if (input_file) { *input_file->filter_str_next = ftr; input_file->filter_str_next = &ftr->next; } else { *filter_next = ftr; filter_next = &ftr->next; } } static void __add_filter(struct pid_list **head, const char *arg) { struct pid_list *list; char *pids = strdup(arg); char *pid; char *sav; int free = 1; if (!pids) die("malloc"); pid = strtok_r(pids, ",", &sav); while (pid) { list = malloc(sizeof(*list)); if (!list) die("Failed to allocate for arg %s", arg); list->pid = pid; list->free = free; list->next = *head; *head = list; /* The first pid needs to be freed */ free = 0; pid = strtok_r(NULL, ",", &sav); } } static void add_comm_filter(const char *arg) { __add_filter(&comm_list, arg); } static void add_pid_filter(const char *arg) { __add_filter(&pid_list, arg); } static char *append_pid_filter(char *curr_filter, char *pid) { char *filter; int len, curr_len; #define FILTER_FMT "(common_pid==" __STR ")||(pid==" __STR ")||(next_pid==" __STR ")" #undef __STR #define __STR "" /* strlen(".*:") > strlen("||") */ len = strlen(".*:" FILTER_FMT) + strlen(pid) * 3 + 1; #undef __STR #define __STR "%s" if (!curr_filter) { filter = malloc(len); if (!filter) die("Failed to allocate pid filter"); sprintf(filter, ".*:" FILTER_FMT, pid, pid, pid); } else { curr_len = strlen(curr_filter); len += curr_len; filter = realloc(curr_filter, len); if (!filter) die("realloc"); sprintf(filter + curr_len, "||" FILTER_FMT, pid, pid, pid); } return filter; } static void convert_comm_filter(struct tracecmd_input *handle) { struct tep_cmdline *cmdline; struct tep_handle *pevent; struct pid_list *list; char pidstr[100]; if (!comm_list) return; pevent = tracecmd_get_tep(handle); /* Seach for comm names and get their pids */ for (list = comm_list; list; list = list->next) { cmdline = tep_data_pid_from_comm(pevent, list->pid, NULL); if (!cmdline) { warning("comm: %s not in cmdline list", list->pid); continue; } do { sprintf(pidstr, "%d", tep_cmdline_pid(pevent, cmdline)); add_pid_filter(pidstr); cmdline = tep_data_pid_from_comm(pevent, list->pid, cmdline); } while (cmdline); } while (comm_list) { list = comm_list; comm_list = comm_list->next; if (list->free) free(list->pid); free(list); } } static void make_pid_filter(struct tracecmd_input *handle, struct input_files *input_files) { struct pid_list *list; char *str = NULL; convert_comm_filter(handle); if (!pid_list) return; /* First do all common pids */ for (list = pid_list; list; list = list->next) { str = append_pid_filter(str, list->pid); } add_filter(input_files, str, 0); free(str); while (pid_list) { list = pid_list; pid_list = pid_list->next; if (list->free) free(list->pid); free(list); } } static int __process_filters(struct tracecmd_input *handle, struct filter_str *filters) { struct tracecmd_filter *trace_filter; for (; filters; filters = filters->next) { trace_filter = tracecmd_filter_add(handle, filters->filter, filters->neg); if (!trace_filter) die("Failed to create event filter: %s", filters->filter); } return !!filters; } static void process_filters(struct handle_list *handles) { struct input_files *input_file = handles->input_file ?: last_input_file; int added = 0; make_pid_filter(handles->handle, input_file); /* * Order of filter processing matters. Apply the global filters * before file-specific ones. */ added += __process_filters(handles->handle, filter_strings); if (input_file) added += __process_filters(handles->handle, input_file->filter_str); if (added && test_filters_mode) exit(0); } static void init_wakeup(struct tracecmd_input *handle) { struct tep_handle *pevent; struct tep_event *event; if (!show_wakeup) return; pevent = tracecmd_get_tep(handle); trace_hash_init(&wakeup_hash, WAKEUP_HASH_SIZE); event = tep_find_event_by_name(pevent, "sched", "sched_wakeup"); if (!event) goto fail; wakeup_id = event->id; wakeup_task = tep_find_field(event, "pid"); if (!wakeup_task) goto fail; wakeup_success = tep_find_field(event, "success"); event = tep_find_event_by_name(pevent, "sched", "sched_switch"); if (!event) goto fail; sched_id = event->id; sched_task = tep_find_field(event, "next_pid"); if (!sched_task) goto fail; sched_prio = tep_find_field(event, "next_prio"); if (!sched_prio) goto fail; wakeup_new_id = -1; event = tep_find_event_by_name(pevent, "sched", "sched_wakeup_new"); if (!event) goto skip; wakeup_new_id = event->id; wakeup_new_task = tep_find_field(event, "pid"); if (!wakeup_new_task) goto fail; wakeup_new_success = tep_find_field(event, "success"); skip: return; fail: show_wakeup = 0; } static void add_wakeup(unsigned int val, unsigned long long start) { unsigned int key = trace_hash(val); struct wakeup_info *info; struct trace_hash_item *item; item = trace_hash_find(&wakeup_hash, key, NULL, NULL); if (item) { info = container_of(item, struct wakeup_info, hash); /* Hmm, double wakeup? */ info->start = start; return; } info = malloc(sizeof(*info)); if (!info) die("Failed to allocate wakeup info"); info->hash.key = key; info->start = start; trace_hash_add(&wakeup_hash, &info->hash); } static unsigned long long max_lat = 0; static unsigned long long max_time; static unsigned long long min_lat = -1; static unsigned long long min_time; static unsigned long long max_rt_lat = 0; static unsigned long long max_rt_time; static unsigned long long min_rt_lat = -1; static unsigned long long min_rt_time; static void add_sched(unsigned int val, unsigned long long end, int rt) { struct trace_hash_item *item; unsigned int key = trace_hash(val); struct wakeup_info *info; unsigned long long cal; item = trace_hash_find(&wakeup_hash, key, NULL, NULL); if (!item) return; info = container_of(item, struct wakeup_info, hash); cal = end - info->start; if (cal > max_lat) { max_lat = cal; max_time = end; } if (cal < min_lat) { min_lat = cal; min_time = end; } if (rt) { if (cal > max_rt_lat) { max_rt_lat = cal; max_rt_time = end; } if (cal < min_rt_lat) { min_rt_lat = cal; min_rt_time = end; } } printf(" Latency: %llu.%03llu usecs", cal / 1000, cal % 1000); total_wakeup_lat += cal; wakeup_lat_count++; if (rt) { total_wakeup_rt_lat += cal; wakeup_rt_lat_count++; } trace_hash_del(item); free(info); } static void process_wakeup(struct tep_handle *pevent, struct tep_record *record) { unsigned long long val; int id; if (!show_wakeup) return; id = tep_data_type(pevent, record); if (id == wakeup_id) { if (tep_read_number_field(wakeup_success, record->data, &val) == 0) { if (!val) return; } if (tep_read_number_field(wakeup_task, record->data, &val)) return; add_wakeup(val, record->ts); } else if (id == wakeup_new_id) { if (tep_read_number_field(wakeup_new_success, record->data, &val) == 0) { if (!val) return; } if (tep_read_number_field(wakeup_new_task, record->data, &val)) return; add_wakeup(val, record->ts); } else if (id == sched_id) { int rt = 1; if (tep_read_number_field(sched_prio, record->data, &val)) return; if (val > 99) rt = 0; if (tep_read_number_field(sched_task, record->data, &val)) return; add_sched(val, record->ts, rt); } } static void show_wakeup_timings(unsigned long long total, unsigned long count, unsigned long long lat_max, unsigned long long time_max, unsigned long long lat_min, unsigned long long time_min) { total /= count; printf("\nAverage wakeup latency: %llu.%03llu usecs\n", total / 1000, total % 1000); printf("Maximum Latency: %llu.%03llu usecs at ", lat_max / 1000, lat_max % 1000); printf("timestamp: %llu.%06llu\n", time_max / 1000000000, ((time_max + 500) % 1000000000) / 1000); printf("Minimum Latency: %llu.%03llu usecs at ", lat_min / 1000, lat_min % 1000); printf("timestamp: %llu.%06llu\n\n", time_min / 1000000000, ((time_min + 500) % 1000000000) / 1000); } static void finish_wakeup(void) { struct wakeup_info *info; struct trace_hash_item **bucket; struct trace_hash_item *item; if (!show_wakeup || !wakeup_lat_count) return; show_wakeup_timings(total_wakeup_lat, wakeup_lat_count, max_lat, max_time, min_lat, min_time); if (wakeup_rt_lat_count) { printf("RT task timings:\n"); show_wakeup_timings(total_wakeup_rt_lat, wakeup_rt_lat_count, max_rt_lat, max_rt_time, min_rt_lat, min_rt_time); } trace_hash_for_each_bucket(bucket, &wakeup_hash) { trace_hash_while_item(item, bucket) { trace_hash_del(item); info = container_of(item, struct wakeup_info, hash); free(info); } } trace_hash_free(&wakeup_hash); } void trace_show_data(struct tracecmd_input *handle, struct tep_record *record) { tracecmd_show_data_func func = tracecmd_get_show_data_func(handle); const char *tfmt = time_format(handle, TIME_FMT_NORMAL); const char *cfmt = latency_format ? "%8.8s-%-5d %3d" : "%16s-%-5d [%03d]"; struct tep_handle *pevent; struct tep_event *event; struct trace_seq s; int cpu = record->cpu; bool use_trace_clock; static unsigned long long last_ts; unsigned long long diff_ts; unsigned long page_size; char buf[50]; page_size = tracecmd_page_size(handle); test_save(record, cpu); if (func) { func(handle, record); return; } pevent = tracecmd_get_tep(handle); event = tep_find_event_by_record(pevent, record); use_trace_clock = tracecmd_get_use_trace_clock(handle); trace_seq_init(&s); if (record->missed_events > 0) trace_seq_printf(&s, "CPU:%d [%lld EVENTS DROPPED]\n", cpu, record->missed_events); else if (record->missed_events < 0) trace_seq_printf(&s, "CPU:%d [EVENTS DROPPED]\n", cpu); if (buffer_breaks || tracecmd_get_debug()) { if (tracecmd_record_at_buffer_start(handle, record)) { trace_seq_printf(&s, "CPU:%d [SUBBUFFER START]", cpu); if (tracecmd_get_debug()) trace_seq_printf(&s, " [%lld:0x%llx]", tracecmd_page_ts(handle, record), record->offset & ~(page_size - 1)); trace_seq_putc(&s, '\n'); } } tep_print_event(pevent, &s, record, cfmt, TEP_PRINT_COMM, TEP_PRINT_PID, TEP_PRINT_CPU); if (raw_format) trace_seq_printf(&s, "-0x%x", tep_data_flags(pevent, record)); else tep_print_event(pevent, &s, record, latency_format ? "%s" : " %s", TEP_PRINT_LATENCY); tep_print_event(pevent, &s, record, tfmt, TEP_PRINT_TIME); if (tsdiff) { unsigned long long rec_ts = record->ts; buf[0] = 0; if (use_trace_clock && !tep_test_flag(pevent, TEP_NSEC_OUTPUT)) rec_ts = (rec_ts + 500) / 1000; if (last_ts) { diff_ts = rec_ts - last_ts; snprintf(buf, 50, "(+%lld)", diff_ts); buf[49] = 0; } last_ts = rec_ts; trace_seq_printf(&s, " %-8s", buf); } print_event_name(&s, event); tep_print_event(pevent, &s, record, "%s", format_type); if (s.len && *(s.buffer + s.len - 1) == '\n') s.len--; if (tracecmd_get_debug()) { struct kbuffer *kbuf; struct kbuffer_raw_info info; void *page; void *offset; trace_seq_printf(&s, " [%d:0x%llx:%d]", tracecmd_record_ts_delta(handle, record), record->offset & (page_size - 1), record->size); kbuf = tracecmd_record_kbuf(handle, record); page = tracecmd_record_page(handle, record); offset = tracecmd_record_offset(handle, record); if (kbuf && page && offset) { struct kbuffer_raw_info *pi = &info; /* We need to get the record raw data to get next */ pi->next = offset; pi = kbuffer_raw_get(kbuf, page, pi); while ((pi = kbuffer_raw_get(kbuf, page, pi))) { if (pi->type < KBUFFER_TYPE_PADDING) break; switch (pi->type) { case KBUFFER_TYPE_PADDING: trace_seq_printf(&s, "\n PADDING: "); break; case KBUFFER_TYPE_TIME_EXTEND: trace_seq_printf(&s, "\n TIME EXTEND: "); break; case KBUFFER_TYPE_TIME_STAMP: trace_seq_printf(&s, "\n TIME STAMP: "); break; } if (pi->type == KBUFFER_TYPE_TIME_STAMP) trace_seq_printf(&s, "timestamp:%lld length:%d", pi->delta, pi->length); else trace_seq_printf(&s, "delta:%lld length:%d", pi->delta, pi->length); } } } trace_seq_do_printf(&s); trace_seq_destroy(&s); process_wakeup(pevent, record); printf("\n"); } static void read_latency(struct tracecmd_input *handle) { char *buf = NULL; size_t size = 0; int r; do { r = tracecmd_latency_data_read(handle, &buf, &size); if (r > 0) printf("%.*s", r, buf); } while (r > 0); printf("\n"); free(buf); } static int test_filters(struct tep_handle *pevent, struct tep_record *record) { int ret = FILTER_NONE; int flags; if (no_irqs || no_softirqs) { flags = tep_data_flags(pevent, record); if (no_irqs && (flags & TRACE_FLAG_HARDIRQ)) return FILTER_MISS; if (no_softirqs && (flags & TRACE_FLAG_SOFTIRQ)) return FILTER_MISS; } return ret; } struct stack_info_cpu { int cpu; int last_printed; }; struct stack_info { struct stack_info *next; struct handle_list *handles; struct stack_info_cpu *cpus; int nr_cpus; }; static void print_handle_file(struct handle_list *handles) { /* Only print file names if more than one file is read */ if (!multi_inputs && !instances) return; if (handles->file && *handles->file != '\0') printf("%*s: ", max_file_size, handles->file); else printf("%*s ", max_file_size, ""); } static bool skip_record(struct handle_list *handles, struct tep_record *record, int cpu) { struct tep_handle *tep; bool found = false; int ret; tep = tracecmd_get_tep(handles->handle); if (filter_cpus) { int i; for (i = 0; filter_cpus[i] >= 0; i++) { if (filter_cpus[i] == cpu) { found = true; break; } } if (!found) return true; found = false; } ret = test_filters(tep, record); switch (ret) { case FILTER_NOEXIST: break; case FILTER_NONE: case FILTER_MATCH: /* Test the negative filters (-v) */ ret = test_filters(tep, record); if (ret != FILTER_MATCH) { found = true; break; } } return !found; } struct kvm_cpu_map { struct tracecmd_input *guest_handle; int guest_vcpu; int host_pid; }; static struct kvm_cpu_map *vcpu_maps; static int nr_vcpu_maps; static int cmp_map(const void *A, const void *B) { const struct kvm_cpu_map *a = A; const struct kvm_cpu_map *b = B; if (a->host_pid < b->host_pid) return -1; return a->host_pid > b->host_pid; } static void map_vcpus(struct tracecmd_input **handles, int nr_handles) { struct tracecmd_input *host_handle = handles[0]; unsigned long long traceid; struct kvm_cpu_map *map; const int *cpu_pids; const char *name; int vcpu_count; int ret; int i, k; for (i = 1; i < nr_handles; i++) { traceid = tracecmd_get_traceid(handles[i]); ret = tracecmd_get_guest_cpumap(host_handle, traceid, &name, &vcpu_count, &cpu_pids); if (ret) continue; map = realloc(vcpu_maps, sizeof(*map) * (nr_vcpu_maps + vcpu_count)); if (!map) die("Could not allocate vcpu maps"); vcpu_maps = map; map += nr_vcpu_maps; nr_vcpu_maps += vcpu_count; for (k = 0; k < vcpu_count; k++) { map[k].guest_handle = handles[i]; map[k].guest_vcpu = k; map[k].host_pid = cpu_pids[k]; } } if (!vcpu_maps) return; qsort(vcpu_maps, nr_vcpu_maps, sizeof(*map), cmp_map); } const char *tep_plugin_kvm_get_func(struct tep_event *event, struct tep_record *record, unsigned long long *val) { struct tep_handle *tep; struct kvm_cpu_map *map; struct kvm_cpu_map key; unsigned long long rip = *val; const char *func; int pid; if (!vcpu_maps || !nr_vcpu_maps) return NULL; /* * A kvm event is referencing an address of the guest. * get the PID of this event, and then find which guest * it belongs to. Then return the function name from that guest's * handle. */ pid = tep_data_pid(event->tep, record); key.host_pid = pid; map = bsearch(&key, vcpu_maps, nr_vcpu_maps, sizeof(*vcpu_maps), cmp_map); if (!map) return NULL; tep = tracecmd_get_tep(map->guest_handle); func = tep_find_function(tep, rip); if (func) *val = tep_find_function_address(tep, rip); return func; } static int process_record(struct tracecmd_input *handle, struct tep_record *record, int cpu, void *data) { struct handle_list *handles = tracecmd_get_private(handle); unsigned long long *last_timestamp = data; if (skip_record(handles, record, cpu)) return 0; if (tscheck && *last_timestamp > record->ts) { errno = 0; warning("WARNING: Record on cpu %d went backwards: %lld to %lld delta: -%lld\n", cpu, *last_timestamp, record->ts, *last_timestamp - record->ts); } *last_timestamp = record->ts; print_handle_file(handles); trace_show_data(handle, record); return 0; } enum output_type { OUTPUT_NORMAL, OUTPUT_STAT_ONLY, OUTPUT_UNAME_ONLY, OUTPUT_VERSION_ONLY, }; static void read_data_info(struct list_head *handle_list, enum output_type otype, int global, int align_ts) { unsigned long long ts, first_ts; struct handle_list *handles; struct tracecmd_input **handle_array; unsigned long long last_timestamp = 0; int nr_handles = 0; int first = 1; int ret; list_for_each_entry(handles, handle_list, list) { int cpus; nr_handles++; if (!tracecmd_is_buffer_instance(handles->handle)) { ret = tracecmd_init_data(handles->handle); if (ret < 0) die("failed to init data"); } cpus = tracecmd_cpus(handles->handle); handles->cpus = cpus; process_filters(handles); /* Don't process instances that we added here */ if (tracecmd_is_buffer_instance(handles->handle)) continue; if (align_ts) { ts = tracecmd_get_first_ts(handles->handle); if (first || first_ts > ts) first_ts = ts; first = 0; } print_handle_file(handles); printf("cpus=%d\n", cpus); /* Latency trace is just all ASCII */ if (ret > 0) { if (multi_inputs) die("latency traces do not work with multiple inputs"); read_latency(handles->handle); return; } switch (otype) { case OUTPUT_NORMAL: break; case OUTPUT_STAT_ONLY: printf("\nKernel buffer statistics:\n" " Note: \"entries\" are the entries left in the kernel ring buffer and are not\n" " recorded in the trace data. They should all be zero.\n\n"); tracecmd_print_stats(handles->handle); continue; case OUTPUT_UNAME_ONLY: tracecmd_print_uname(handles->handle); case OUTPUT_VERSION_ONLY: tracecmd_print_version(handles->handle); continue; } init_wakeup(handles->handle); if (last_hook) last_hook->next = tracecmd_hooks(handles->handle); else hooks = tracecmd_hooks(handles->handle); if (profile) trace_init_profile(handles->handle, hooks, global); /* If this file has buffer instances, get the handles for them */ instances = tracecmd_buffer_instances(handles->handle); if (instances) { struct tracecmd_input *new_handle; struct input_files *file_input; const char *save_name; const char *name; int i; file_input = handles->input_file; for (i = 0; i < instances; i++) { name = tracecmd_buffer_instance_name(handles->handle, i); if (!name) die("error in reading buffer instance"); new_handle = tracecmd_buffer_instance_handle(handles->handle, i); if (!new_handle) { warning("could not retrieve handle %s", name); continue; } if (file_input) { save_name = file_input->file; file_input->file = name; } else { save_name = NULL; file_input = add_input(name); } add_handle(new_handle, file_input); if (save_name) file_input->file = save_name; } } } if (otype != OUTPUT_NORMAL) return; if (align_ts) { list_for_each_entry(handles, handle_list, list) { tracecmd_add_ts_offset(handles->handle, -first_ts); } } handle_array = calloc(nr_handles, sizeof(*handle_array)); if (!handle_array) die("Could not allocate memory for handle list"); nr_handles = 0; list_for_each_entry(handles, handle_list, list) { tracecmd_set_private(handles->handle, handles); handle_array[nr_handles++] = handles->handle; } map_vcpus(handle_array, nr_handles); tracecmd_iterate_events_multi(handle_array, nr_handles, process_record, &last_timestamp); free(handle_array); if (profile) do_trace_profile(); list_for_each_entry(handles, handle_list, list) { show_test(handles->handle); } } struct tracecmd_input *read_trace_header(const char *file, int flags) { input_fd = open(file, O_RDONLY); if (input_fd < 0) die("opening '%s'\n", file); return tracecmd_alloc_fd(input_fd, flags); } static void sig_end(int sig) { struct handle_list *handles; fprintf(stderr, "trace-cmd: Received SIGINT\n"); list_for_each_entry(handles, &handle_list, list) { tracecmd_close(handles->handle); } exit(0); } static const char *skip_space_and_test_digit(const char *p, const char *cpu_str) { while (isspace(*p)) p++; if (!isdigit(*p)) die("invalid character '%c' in cpu string '%s'", *p, cpu_str); return p; } static void __add_cpu(int cpu) { filter_cpus = tracecmd_add_id(filter_cpus, cpu, nr_filter_cpus++); } static void parse_cpulist(const char *cpu_str) { unsigned a, b; const char *s = cpu_str; do { s = skip_space_and_test_digit(s, cpu_str); b = a = strtoul(s, (char **)&s, 10); if (*s == '-') { s = skip_space_and_test_digit(s + 1, cpu_str); b = strtoul(s, (char **)&s, 10); } if (!(a <= b)) die("range of cpu numbers must be lower to greater"); while (a <= b) { __add_cpu(a); a++; } if (*s == ',' || *s == ':') s++; } while (*s != '\0'); } static void read_file_fd(int fd, char *dst, int len) { size_t size = 0; int r; do { r = read(fd, dst+size, len); if (r > 0) { size += r; len -= r; } } while (r > 0); } static void add_functions(struct tep_handle *pevent, const char *file) { struct stat st; char *buf; int ret; int fd; fd = open(file, O_RDONLY); if (fd < 0) die("Can't read file %s", file); ret = fstat(fd, &st); if (ret < 0) die("Can't stat file %s", file); buf = malloc(st.st_size + 1); if (!buf) die("Failed to allocate for function buffer"); read_file_fd(fd, buf, st.st_size); buf[st.st_size] = '\0'; close(fd); tep_parse_kallsyms(pevent, buf); free(buf); } static void process_plugin_option(char *option) { char *name = option; char *val = NULL; char *p; if ((p = strstr(name, "="))) { *p = '\0'; val = p+1; } tep_plugin_add_option(name, val); } static void set_event_flags(struct tep_handle *pevent, struct event_str *list, unsigned int flag) { struct tep_event **events; struct tep_event *event; struct event_str *str; regex_t regex; int ret; int i; if (!list) return; events = tep_list_events(pevent, 0); for (str = list; str; str = str->next) { char *match; match = malloc(strlen(str->event) + 3); if (!match) die("Failed to allocate for match string '%s'", str->event); sprintf(match, "^%s$", str->event); ret = regcomp(®ex, match, REG_ICASE|REG_NOSUB); if (ret < 0) die("Can't parse '%s'", str->event); free(match); for (i = 0; events[i]; i++) { event = events[i]; if (!regexec(®ex, event->name, 0, NULL, 0) || !regexec(®ex, event->system, 0, NULL, 0)) event->flags |= flag; } } } static void show_event_ts(struct tracecmd_input *handle, struct tep_record *record) { const char *tfmt = time_format(handle, TIME_FMT_TS); struct tep_handle *tep = tracecmd_get_tep(handle); struct trace_seq s; trace_seq_init(&s); tep_print_event(tep, &s, record, tfmt, TEP_PRINT_TIME); printf("%s", s.buffer); trace_seq_destroy(&s); } static void add_hook(const char *arg) { struct hook_list *hook; hook = tracecmd_create_event_hook(arg); hook->next = hooks; hooks = hook; if (!last_hook) last_hook = hook; } static void add_first_input(const char *input_file, long long tsoffset) { struct input_files *item; /* Copy filter strings to this input file */ item = add_input(input_file); item->filter_str = filter_strings; if (filter_strings) item->filter_str_next = filter_next; else item->filter_str_next = &item->filter_str; /* Copy the tsoffset to this input file */ item->tsoffset = tsoffset; } enum { OPT_verbose = 234, OPT_align_ts = 235, OPT_raw_ts = 236, OPT_version = 237, OPT_tscheck = 238, OPT_tsdiff = 239, OPT_ts2secs = 240, OPT_tsoffset = 241, OPT_bycomm = 242, OPT_debug = 243, OPT_uname = 244, OPT_profile = 245, OPT_event = 246, OPT_comm = 247, OPT_boundary = 248, OPT_stat = 249, OPT_pid = 250, OPT_nodate = 251, OPT_check_event_parsing = 252, OPT_kallsyms = 253, OPT_events = 254, OPT_cpu = 255, OPT_cpus = 256, OPT_first = 257, OPT_last = 258, }; void trace_report (int argc, char **argv) { struct tracecmd_input *handle; struct tep_handle *pevent; struct event_str *raw_events = NULL; struct event_str *nohandler_events = NULL; struct event_str **raw_ptr = &raw_events; struct event_str **nohandler_ptr = &nohandler_events; const char *functions = NULL; const char *print_event = NULL; struct input_files *inputs; struct handle_list *handles; enum output_type otype; long long tsoffset = 0; unsigned long long ts2secs = 0; unsigned long long ts2sc; int open_flags = 0; int show_stat = 0; int show_funcs = 0; int show_endian = 0; int show_page_size = 0; int show_printk = 0; int show_uname = 0; int show_version = 0; int show_events = 0; int show_cpus = 0; int show_first = 0; int show_last = 0; int print_events = 0; int nanosec = 0; int no_date = 0; int raw_ts = 0; int align_ts = 0; int global = 0; int neg = 0; int ret = 0; int check_event_parsing = 0; int c; list_head_init(&handle_list); list_head_init(&input_files); if (argc < 2) usage(argv); if (strcmp(argv[1], "report") != 0) usage(argv); signal(SIGINT, sig_end); trace_set_loglevel(TEP_LOG_ERROR); for (;;) { int option_index = 0; static struct option long_options[] = { {"cpu", required_argument, NULL, OPT_cpu}, {"cpus", no_argument, NULL, OPT_cpus}, {"events", no_argument, NULL, OPT_events}, {"event", required_argument, NULL, OPT_event}, {"filter-test", no_argument, NULL, 'T'}, {"first-event", no_argument, NULL, OPT_first}, {"kallsyms", required_argument, NULL, OPT_kallsyms}, {"pid", required_argument, NULL, OPT_pid}, {"comm", required_argument, NULL, OPT_comm}, {"check-events", no_argument, NULL, OPT_check_event_parsing}, {"nodate", no_argument, NULL, OPT_nodate}, {"stat", no_argument, NULL, OPT_stat}, {"boundary", no_argument, NULL, OPT_boundary}, {"debug", no_argument, NULL, OPT_debug}, {"last-event", no_argument, NULL, OPT_last}, {"profile", no_argument, NULL, OPT_profile}, {"uname", no_argument, NULL, OPT_uname}, {"version", no_argument, NULL, OPT_version}, {"by-comm", no_argument, NULL, OPT_bycomm}, {"ts-offset", required_argument, NULL, OPT_tsoffset}, {"ts2secs", required_argument, NULL, OPT_ts2secs}, {"ts-diff", no_argument, NULL, OPT_tsdiff}, {"ts-check", no_argument, NULL, OPT_tscheck}, {"raw-ts", no_argument, NULL, OPT_raw_ts}, {"align-ts", no_argument, NULL, OPT_align_ts}, {"verbose", optional_argument, NULL, OPT_verbose}, {"help", no_argument, NULL, '?'}, {NULL, 0, NULL, 0} }; c = getopt_long (argc-1, argv+1, "+hSIi:H:feGpRr:tPNn:LlEwF:V::vTqO:", long_options, &option_index); if (c == -1) break; switch (c) { case 'h': usage(argv); break; case 'i': if (input_file) { multi_inputs++; add_input(optarg); } else { input_file = optarg; add_first_input(input_file, tsoffset); } break; case 'F': add_filter(last_input_file, optarg, neg); break; case 'H': add_hook(optarg); break; case 'T': test_filters_mode = 1; break; case 'f': show_funcs = 1; break; case 'I': no_irqs = 1; break; case 'S': no_softirqs = 1; break; case 'P': show_printk = 1; break; case 'L': open_flags |= TRACECMD_FL_LOAD_NO_SYSTEM_PLUGINS; break; case 'N': open_flags |= TRACECMD_FL_LOAD_NO_PLUGINS; break; case 'n': *nohandler_ptr = malloc(sizeof(struct event_str)); if (!*nohandler_ptr) die("Failed to allocate for '-n %s'", optarg); (*nohandler_ptr)->event = optarg; (*nohandler_ptr)->next = NULL; nohandler_ptr = &(*nohandler_ptr)->next; break; case 'e': show_endian = 1; break; case 'p': show_page_size = 1; break; case 'E': show_events = 1; break; case 'G': global = 1; break; case 'R': raw_format = true; break; case 'r': *raw_ptr = malloc(sizeof(struct event_str)); if (!*raw_ptr) die("Failed to allocate '-r %s'", optarg); (*raw_ptr)->event = optarg; (*raw_ptr)->next = NULL; raw_ptr = &(*raw_ptr)->next; break; case 't': nanosec = 1; break; case 'w': show_wakeup = 1; break; case 'l': latency_format = 1; break; case 'O': process_plugin_option(optarg); break; case 'v': if (neg) die("Only 1 -v can be used"); neg = 1; break; case 'q': silence_warnings = 1; tracecmd_set_loglevel(TEP_LOG_NONE); break; case OPT_cpu: parse_cpulist(optarg); break; case OPT_cpus: show_cpus = 1; break; case OPT_events: print_events = 1; break; case OPT_event: print_event = optarg; break; case OPT_kallsyms: functions = optarg; break; case OPT_pid: add_pid_filter(optarg); break; case OPT_comm: add_comm_filter(optarg); break; case OPT_check_event_parsing: check_event_parsing = 1; break; case OPT_nodate: no_date = 1; break; case OPT_stat: show_stat = 1; break; case OPT_first: show_first = 1; show_cpus = 1; break; case OPT_last: show_last = 1; show_cpus = 1; break; case OPT_boundary: /* Debug to look at buffer breaks */ buffer_breaks = 1; break; case OPT_debug: buffer_breaks = 1; tracecmd_set_debug(true); break; case OPT_profile: profile = 1; break; case OPT_uname: show_uname = 1; break; case OPT_version: show_version = 1; break; case OPT_bycomm: trace_profile_set_merge_like_comms(); break; case OPT_ts2secs: ts2sc = atoll(optarg); if (multi_inputs) last_input_file->ts2secs = ts2sc; else ts2secs = ts2sc; break; case OPT_tsoffset: tsoffset = atoll(optarg); if (multi_inputs) last_input_file->tsoffset = tsoffset; if (!input_file) die("--ts-offset must come after -i"); break; case OPT_tsdiff: tsdiff = 1; break; case OPT_tscheck: tscheck = 1; break; case OPT_raw_ts: raw_ts = 1; break; case OPT_align_ts: align_ts = 1; break; case 'V': case OPT_verbose: show_status = 1; if (trace_set_verbose(optarg) < 0) die("invalid verbose level %s", optarg); break; default: usage(argv); } } if ((argc - optind) >= 2) { if (input_file) usage(argv); input_file = argv[optind + 1]; add_first_input(input_file, tsoffset); for (int i = optind + 2; i < argc; i++) { multi_inputs++; add_input(argv[i]); } } if (!multi_inputs) { if (!input_file) { input_file = default_input_file; add_first_input(input_file, tsoffset); } } else if (show_wakeup) die("Wakeup tracing can only be done on a single input file"); list_for_each_entry(inputs, &input_files, list) { handle = read_trace_header(inputs->file, open_flags); if (!handle) die("error reading header for %s", inputs->file); /* If used with instances, top instance will have no tag */ add_handle(handle, multi_inputs ? inputs : NULL); if (no_date) tracecmd_set_flag(handle, TRACECMD_FL_IGNORE_DATE); if (raw_ts) tracecmd_set_flag(handle, TRACECMD_FL_RAW_TS); page_size = tracecmd_page_size(handle); if (show_page_size) { printf("file page size is %d, and host page size is %d\n", page_size, getpagesize()); return; } if (inputs->tsoffset) tracecmd_set_ts_offset(handle, inputs->tsoffset); if (inputs->ts2secs) tracecmd_set_ts2secs(handle, inputs->ts2secs); else if (ts2secs) tracecmd_set_ts2secs(handle, ts2secs); pevent = tracecmd_get_tep(handle); if (nanosec) tep_set_flag(pevent, TEP_NSEC_OUTPUT); if (raw_format) format_type = TEP_PRINT_INFO_RAW; if (test_filters_mode) tep_set_test_filters(pevent, 1); if (functions) add_functions(pevent, functions); if (show_endian) { printf("file is %s endian and host is %s endian\n", tep_is_file_bigendian(pevent) ? "big" : "little", tep_is_local_bigendian(pevent) ? "big" : "little"); return; } if (print_events) { tracecmd_print_events(handle, NULL); return; } if (print_event) { tracecmd_print_events(handle, print_event); return; } ret = tracecmd_read_headers(handle, 0); if (check_event_parsing) { if (ret || tracecmd_get_parsing_failures(handle)) exit(EINVAL); else exit(0); } else { if (ret) return; } if (show_funcs) { tep_print_funcs(pevent); return; } if (show_printk) { tep_print_printk(pevent); return; } if (show_events) { struct tep_event **events; struct tep_event *event; int i; events = tep_list_events(pevent, TEP_EVENT_SORT_SYSTEM); for (i = 0; events[i]; i++) { event = events[i]; if (event->system) printf("%s:", event->system); printf("%s\n", event->name); } return; } if (show_cpus) { struct tep_record *record; int cpus; int ret; int i; if (!tracecmd_is_buffer_instance(handle)) { ret = tracecmd_init_data(handle); if (ret < 0) die("failed to init data"); } cpus = tracecmd_cpus(handle); printf("List of CPUs in %s with data:\n", inputs->file); for (i = 0; i < cpus; i++) { if ((record = tracecmd_read_cpu_first(handle, i))) { printf(" %d", i); if (show_first) { printf("\tFirst event:"); show_event_ts(handle, record); } if (show_last) { tracecmd_free_record(record); record = tracecmd_read_cpu_last(handle, i); if (record) { printf("\tLast event:"); show_event_ts(handle, record); } } tracecmd_free_record(record); printf("\n"); } } continue; } set_event_flags(pevent, nohandler_events, TEP_EVENT_FL_NOHANDLE); set_event_flags(pevent, raw_events, TEP_EVENT_FL_PRINTRAW); } if (show_cpus) return; otype = OUTPUT_NORMAL; if (tracecmd_get_flags(handle) & TRACECMD_FL_RAW_TS) { tep_func_repeat_format = "%d"; } else if (tracecmd_get_flags(handle) & TRACECMD_FL_IN_USECS) { if (tep_test_flag(tracecmd_get_tep(handle), TEP_NSEC_OUTPUT)) tep_func_repeat_format = "%9.1d"; else tep_func_repeat_format = "%6.1000d"; } else { tep_func_repeat_format = "%12d"; } if (show_stat) otype = OUTPUT_STAT_ONLY; /* yeah yeah, uname overrides stat */ if (show_uname) otype = OUTPUT_UNAME_ONLY; /* and version overrides uname! */ if (show_version) otype = OUTPUT_VERSION_ONLY; read_data_info(&handle_list, otype, global, align_ts); list_for_each_entry(handles, &handle_list, list) { tracecmd_close(handles->handle); } free_handles(); free_inputs(); finish_wakeup(); return; } trace-cmd-v3.3.1/tracecmd/trace-record.c000066400000000000000000005217741470231550600200660ustar00rootroot00000000000000// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2008, 2009, 2010 Red Hat Inc, Steven Rostedt * * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifndef NO_PTRACE #include #else #ifdef WARN_NO_PTRACE #warning ptrace not supported. -c feature will not work #endif #endif #include #include #include #include #include #include #include #include #include #include #include #include #include "tracefs.h" #include "version.h" #include "trace-local.h" #include "trace-msg.h" #define _STR(x) #x #define STR(x) _STR(x) #define RECORD_PIDFILE "trace-cmd-record.pid" #define TRACE_CTRL "tracing_on" #define TRACE "trace" #define AVAILABLE "available_tracers" #define CURRENT "current_tracer" #define ITER_CTRL "trace_options" #define MAX_LATENCY "tracing_max_latency" #define STAMP "stamp" #define FUNC_STACK_TRACE "func_stack_trace" #define TSC_CLOCK "x86-tsc" #define dprint(fmt, ...) tracecmd_debug(fmt, ##__VA_ARGS__) enum trace_type { TRACE_TYPE_RECORD = 1, TRACE_TYPE_START = (1 << 1), TRACE_TYPE_STREAM = (1 << 2), TRACE_TYPE_EXTRACT = (1 << 3), TRACE_TYPE_SET = (1 << 4), }; static tracecmd_handle_init_func handle_init = NULL; static int rt_prio; static int keep; static int latency; static long sleep_time = 1000; static int recorder_threads; static struct pid_record_data *pids; static int buffers; /* Clear all function filters */ static int clear_function_filters; static bool no_fifos; static char *host; static const char *gai_err; static bool quiet; static bool fork_process; static bool do_daemonize; static bool created_pidfile; /* Max size to let a per cpu file get */ static int max_kb; static int do_ptrace; static int filter_task; static bool no_filter = false; static int local_cpu_count; static int finished; /* setting of /proc/sys/kernel/ftrace_enabled */ static int fset; static unsigned recorder_flags; /* Try a few times to get an accurate date */ static int date2ts_tries = 50; static struct func_list *graph_funcs; static int func_stack; static int save_stdout = -1; static struct hook_list *hooks; struct event_list { struct event_list *next; const char *event; char *trigger; char *filter; char *pid_filter; char *filter_file; char *trigger_file; char *enable_file; int neg; }; struct tracecmd_event_list *listed_events; struct events { struct events *sibling; struct events *children; struct events *next; char *name; }; /* Files to be reset when done recording */ struct reset_file { struct reset_file *next; char *path; char *reset; int prio; }; static struct reset_file *reset_files; /* Triggers need to be cleared in a special way */ static struct reset_file *reset_triggers; struct buffer_instance top_instance; struct buffer_instance *buffer_instances; struct buffer_instance *first_instance; static struct tracecmd_recorder *recorder; static int ignore_event_not_found = 0; static inline int is_top_instance(struct buffer_instance *instance) { return instance == &top_instance; } static inline int no_top_instance(void) { return first_instance != &top_instance; } static void init_instance(struct buffer_instance *instance) { instance->event_next = &instance->events; } enum { RESET_DEFAULT_PRIO = 0, RESET_HIGH_PRIO = 100000, }; enum trace_cmd { CMD_extract, CMD_start, CMD_stream, CMD_profile, CMD_record, CMD_record_agent, CMD_set, }; struct common_record_context { enum trace_cmd curr_cmd; struct buffer_instance *instance; const char *output; const char *temp; char *date2ts; char *user; const char *clock; const char *compression; struct tsc_nsec tsc2nsec; int data_flags; int tsync_loop_interval; int record_all; int total_disable; int disable; int events; int global; int filtered; int date; int manual; int topt; int run_command; int saved_cmdlines_size; int file_version; }; static void add_reset_file(const char *file, const char *val, int prio) { struct reset_file *reset; struct reset_file **last = &reset_files; /* Only reset if we are not keeping the state */ if (keep) return; reset = malloc(sizeof(*reset)); if (!reset) die("Failed to allocate reset"); reset->path = strdup(file); reset->reset = strdup(val); reset->prio = prio; if (!reset->path || !reset->reset) die("Failed to allocate reset path or val"); while (*last && (*last)->prio > prio) last = &(*last)->next; reset->next = *last; *last = reset; } static void add_reset_trigger(const char *file) { struct reset_file *reset; /* Only reset if we are not keeping the state */ if (keep) return; reset = malloc(sizeof(*reset)); if (!reset) die("Failed to allocate reset"); reset->path = strdup(file); reset->next = reset_triggers; reset_triggers = reset; } /* To save the contents of the file */ static void reset_save_file(const char *file, int prio) { char *content; content = get_file_content(file); if (content) { add_reset_file(file, content, prio); free(content); } } /* * @file: the file to check * @nop: If the content of the file is this, use the reset value * @reset: What to write if the file == @nop */ static void reset_save_file_cond(const char *file, int prio, const char *nop, const char *reset) { char *content; char *cond; if (keep) return; content = get_file_content(file); cond = strstrip(content); if (strcmp(cond, nop) == 0) add_reset_file(file, reset, prio); else add_reset_file(file, content, prio); free(content); } /** * add_instance - add a buffer instance to the internal list * @instance: The buffer instance to add */ void add_instance(struct buffer_instance *instance, int cpu_count) { init_instance(instance); instance->next = buffer_instances; if (first_instance == buffer_instances) first_instance = instance; buffer_instances = instance; instance->cpu_count = cpu_count; buffers++; } static void instance_reset_file_save(struct buffer_instance *instance, char *file, int prio) { char *path; path = tracefs_instance_get_file(instance->tracefs, file); if (path) reset_save_file(path, prio); tracefs_put_tracing_file(path); } static void test_set_event_pid(struct buffer_instance *instance) { static int have_set_event_pid; static int have_event_fork; static int have_func_fork; if (!have_set_event_pid && tracefs_file_exists(top_instance.tracefs, "set_event_pid")) have_set_event_pid = 1; if (!have_event_fork && tracefs_file_exists(top_instance.tracefs, "options/event-fork")) have_event_fork = 1; if (!have_func_fork && tracefs_file_exists(top_instance.tracefs, "options/function-fork")) have_func_fork = 1; if (!instance->have_set_event_pid && have_set_event_pid) { instance->have_set_event_pid = 1; instance_reset_file_save(instance, "set_event_pid", RESET_DEFAULT_PRIO); } if (!instance->have_event_fork && have_event_fork) { instance->have_event_fork = 1; instance_reset_file_save(instance, "options/event-fork", RESET_DEFAULT_PRIO); } if (!instance->have_func_fork && have_func_fork) { instance->have_func_fork = 1; instance_reset_file_save(instance, "options/function-fork", RESET_DEFAULT_PRIO); } } /** * allocate_instance - allocate a new buffer instance, * it must exist in the ftrace system * @name: The name of the instance (instance will point to this) * * Returns a newly allocated instance. In case of an error or if the * instance does not exist in the ftrace system, NULL is returned. */ struct buffer_instance *allocate_instance(const char *name) { struct buffer_instance *instance; instance = calloc(1, sizeof(*instance)); if (!instance) return NULL; if (name) instance->name = strdup(name); if (tracefs_instance_exists(name)) { instance->tracefs = tracefs_instance_create(name); if (!instance->tracefs) goto error; } return instance; error: free(instance->name); tracefs_instance_free(instance->tracefs); free(instance); return NULL; } static int __add_all_instances(const char *tracing_dir) { struct dirent *dent; char *instances_dir; struct stat st; DIR *dir; int ret; if (!tracing_dir) return -1; instances_dir = append_file(tracing_dir, "instances"); if (!instances_dir) return -1; ret = stat(instances_dir, &st); if (ret < 0 || !S_ISDIR(st.st_mode)) { ret = -1; goto out_free; } dir = opendir(instances_dir); if (!dir) { ret = -1; goto out_free; } while ((dent = readdir(dir))) { const char *name = dent->d_name; char *instance_path; struct buffer_instance *instance; if (strcmp(name, ".") == 0 || strcmp(name, "..") == 0) continue; instance_path = append_file(instances_dir, name); ret = stat(instance_path, &st); if (ret < 0 || !S_ISDIR(st.st_mode)) { free(instance_path); continue; } free(instance_path); instance = allocate_instance(name); if (!instance) die("Failed to create instance"); add_instance(instance, local_cpu_count); } closedir(dir); ret = 0; out_free: free(instances_dir); return ret; } /** * add_all_instances - Add all pre-existing instances to the internal list * @tracing_dir: The top-level tracing directory * * Returns whether the operation succeeded */ void add_all_instances(void) { const char *tracing_dir = tracefs_tracing_dir(); if (!tracing_dir) die("can't get the tracing directory"); __add_all_instances(tracing_dir); } /** * tracecmd_stat_cpu - show the buffer stats of a particular CPU * @s: the trace_seq to record the data in. * @cpu: the CPU to stat * */ void tracecmd_stat_cpu_instance(struct buffer_instance *instance, struct trace_seq *s, int cpu) { char buf[BUFSIZ]; char *path; char *file; int fd; int r; file = malloc(40); if (!file) return; snprintf(file, 40, "per_cpu/cpu%d/stats", cpu); path = tracefs_instance_get_file(instance->tracefs, file); free(file); fd = open(path, O_RDONLY); tracefs_put_tracing_file(path); if (fd < 0) return; while ((r = read(fd, buf, BUFSIZ)) > 0) trace_seq_printf(s, "%.*s", r, buf); close(fd); } /** * tracecmd_stat_cpu - show the buffer stats of a particular CPU * @s: the trace_seq to record the data in. * @cpu: the CPU to stat * */ void tracecmd_stat_cpu(struct trace_seq *s, int cpu) { tracecmd_stat_cpu_instance(&top_instance, s, cpu); } static void add_event(struct buffer_instance *instance, struct event_list *event) { *instance->event_next = event; instance->event_next = &event->next; event->next = NULL; } static void reset_event_list(struct buffer_instance *instance) { instance->events = NULL; init_instance(instance); } static char *get_temp_file(struct buffer_instance *instance, int cpu) { const char *output_file = instance->output_file; const char *name; char *file = NULL; int size; if (instance->temp_dir) { if (!instance->temp_file) { const char *f = output_file + strlen(output_file) - 1;; int ret; for (; f > output_file && *f != '/'; f--) ; if (*f == '/') f++; ret = asprintf(&instance->temp_file, "%s/%s", instance->temp_dir, f); if (ret < 0) die("Failed to create temp file"); } output_file = instance->temp_file; } name = tracefs_instance_get_name(instance->tracefs); if (name) { size = snprintf(file, 0, "%s.%s.cpu%d", output_file, name, cpu); file = malloc(size + 1); if (!file) die("Failed to allocate temp file for %s", name); sprintf(file, "%s.%s.cpu%d", output_file, name, cpu); } else { size = snprintf(file, 0, "%s.cpu%d", output_file, cpu); file = malloc(size + 1); if (!file) die("Failed to allocate temp file"); sprintf(file, "%s.cpu%d", output_file, cpu); } return file; } char *trace_get_guest_file(const char *file, const char *guest) { const char *p; char *out = NULL; int ret, base_len; p = strrchr(file, '.'); if (p && p != file) base_len = p - file; else base_len = strlen(file); ret = asprintf(&out, "%.*s-%s%s", base_len, file, guest, file + base_len); if (ret < 0) return NULL; return out; } static void put_temp_file(char *file) { free(file); } static void delete_temp_file(struct buffer_instance *instance, int cpu) { const char *output_file; const char *name; char file[PATH_MAX]; if (instance->temp_file) output_file = instance->temp_file; else output_file = instance->output_file; name = tracefs_instance_get_name(instance->tracefs); if (name) snprintf(file, PATH_MAX, "%s.%s.cpu%d", output_file, name, cpu); else snprintf(file, PATH_MAX, "%s.cpu%d", output_file, cpu); unlink(file); } static int kill_thread_instance(int start, struct buffer_instance *instance) { int n = start; int i; for (i = 0; i < instance->cpu_count; i++) { if (pids[n].pid > 0) { kill(pids[n].pid, SIGKILL); delete_temp_file(instance, i); pids[n].pid = 0; if (pids[n].brass[0] >= 0) close(pids[n].brass[0]); } n++; } return n; } static void kill_threads(void) { struct buffer_instance *instance; int i = 0; if (!recorder_threads || !pids) return; for_all_instances(instance) i = kill_thread_instance(i, instance); } void die(const char *fmt, ...) { va_list ap; int ret = errno; if (errno) perror("trace-cmd"); else ret = -1; if (created_pidfile) remove_pid_file(RECORD_PIDFILE); kill_threads(); va_start(ap, fmt); fprintf(stderr, " "); vfprintf(stderr, fmt, ap); va_end(ap); fprintf(stderr, "\n"); exit(ret); } static int delete_thread_instance(int start, struct buffer_instance *instance) { int n = start; int i; for (i = 0; i < instance->cpu_count; i++) { if (pids) { if (pids[n].pid) { delete_temp_file(instance, i); if (pids[n].pid < 0) pids[n].pid = 0; } n++; } else /* Extract does not allocate pids */ delete_temp_file(instance, i); } return n; } static void delete_thread_data(void) { struct buffer_instance *instance; int i = 0; for_all_instances(instance) i = delete_thread_instance(i, instance); /* * Top instance temp files are still created even if it * isn't used. */ if (no_top_instance()) { for (i = 0; i < local_cpu_count; i++) delete_temp_file(&top_instance, i); } } static void add_tsc2nsec(struct tracecmd_output *handle, struct tsc_nsec *tsc2nsec) { /* multiplier, shift, offset */ struct iovec vector[3]; vector[0].iov_len = 4; vector[0].iov_base = &tsc2nsec->mult; vector[1].iov_len = 4; vector[1].iov_base = &tsc2nsec->shift; vector[2].iov_len = 8; vector[2].iov_base = &tsc2nsec->offset; tracecmd_add_option_v(handle, TRACECMD_OPTION_TSC2NSEC, vector, 3); } static void guest_tsync_complete(struct buffer_instance *instance) { tracecmd_tsync_with_host_stop(instance->tsync); tracecmd_tsync_free(instance->tsync); } static void host_tsync_complete(struct common_record_context *ctx, struct buffer_instance *instance) { struct tracecmd_output *handle = NULL; int fd = -1; int ret; ret = tracecmd_tsync_with_guest_stop(instance->tsync); if (!ret) { fd = open(instance->output_file, O_RDWR); if (fd < 0) die("error opening %s", instance->output_file); handle = tracecmd_get_output_handle_fd(fd); if (!handle) die("cannot create output handle"); if (ctx->tsc2nsec.mult) add_tsc2nsec(handle, &ctx->tsc2nsec); tracecmd_write_guest_time_shift(handle, instance->tsync); tracecmd_append_options(handle); tracecmd_output_close(handle); } tracecmd_tsync_free(instance->tsync); instance->tsync = NULL; } static void tell_guests_to_stop(struct common_record_context *ctx) { struct buffer_instance *instance; /* Send close message to guests */ for_all_instances(instance) { if (is_guest(instance)) { tracecmd_msg_send_close_msg(instance->msg_handle); if (is_proxy(instance) && instance->proxy_fd >= 0) { /* The proxy will send more data now */ if (tracecmd_msg_read_data(instance->msg_handle, instance->proxy_fd)) warning("Failed receiving finishing metadata"); close(instance->proxy_fd); } } } for_all_instances(instance) { if (is_guest(instance)) { if (is_proxy(instance)) guest_tsync_complete(instance); else host_tsync_complete(ctx, instance); } } /* Wait for guests to acknowledge */ for_all_instances(instance) { if (is_guest(instance)) { if (!is_proxy(instance)) { tracecmd_msg_wait_close_resp(instance->msg_handle); tracecmd_msg_handle_close(instance->msg_handle); } } } } static void stop_threads(enum trace_type type) { int ret; int i; if (!recorder_threads) return; /* Tell all threads to finish up */ for (i = 0; i < recorder_threads; i++) { if (pids[i].pid > 0) { kill(pids[i].pid, SIGUSR1); } } /* Flush out the pipes */ if (type & TRACE_TYPE_STREAM) { do { ret = trace_stream_read(pids, recorder_threads, 0); } while (ret > 0); } } static void wait_threads() { int i; for (i = 0; i < recorder_threads; i++) { if (pids[i].pid > 0) { waitpid(pids[i].pid, NULL, 0); pids[i].pid = -1; } } } static int create_recorder(struct buffer_instance *instance, int cpu, enum trace_type type, int *brass); static void flush_threads(void) { struct buffer_instance *instance; long ret; int i; for_all_instances(instance) { for (i = 0; i < instance->cpu_count; i++) { /* Extract doesn't support sub buffers yet */ ret = create_recorder(instance, i, TRACE_TYPE_EXTRACT, NULL); if (ret < 0) die("error reading ring buffer"); } } } static int set_ftrace_enable(const char *path, int set) { struct stat st; int fd; char *val = set ? "1" : "0"; int ret; /* if ftace_enable does not exist, simply ignore it */ fd = stat(path, &st); if (fd < 0) return -ENODEV; reset_save_file(path, RESET_DEFAULT_PRIO); ret = -1; fd = open(path, O_WRONLY); if (fd < 0) goto out; /* Now set or clear the function option */ ret = write(fd, val, 1); close(fd); out: return ret < 0 ? ret : 0; } static int set_ftrace_proc(int set) { const char *path = "/proc/sys/kernel/ftrace_enabled"; int ret; ret = set_ftrace_enable(path, set); if (ret == -1) die ("Can't %s ftrace", set ? "enable" : "disable"); return ret; } static int set_ftrace(struct buffer_instance *instance, int set, int use_proc) { char *path; int ret; path = tracefs_instance_get_file(instance->tracefs, "options/function-trace"); if (!path) return -1; ret = set_ftrace_enable(path, set); tracefs_put_tracing_file(path); /* Always enable ftrace_enable proc file when set is true */ if (ret < 0 || set || use_proc) ret = set_ftrace_proc(set); return ret; } static int write_file(const char *file, const char *str) { int ret; int fd; fd = open(file, O_WRONLY | O_TRUNC); if (fd < 0) die("opening to '%s'", file); ret = write(fd, str, strlen(str)); close(fd); return ret; } static void __clear_trace(struct buffer_instance *instance) { FILE *fp; char *path; if (is_guest(instance)) return; /* reset the trace */ path = tracefs_instance_get_file(instance->tracefs, "trace"); fp = fopen(path, "w"); if (!fp) die("writing to '%s'", path); tracefs_put_tracing_file(path); fwrite("0", 1, 1, fp); fclose(fp); } static void clear_trace_instances(void) { struct buffer_instance *instance; for_all_instances(instance) __clear_trace(instance); } static void reset_max_latency(struct buffer_instance *instance) { tracefs_instance_file_write(instance->tracefs, "tracing_max_latency", "0"); } static int add_filter_pid(struct buffer_instance *instance, int pid, int exclude) { struct filter_pids *p; char buf[100]; for (p = instance->filter_pids; p; p = p->next) { if (p->pid == pid) { p->exclude = exclude; return 0; } } p = malloc(sizeof(*p)); if (!p) die("Failed to allocate pid filter"); p->next = instance->filter_pids; p->exclude = exclude; p->pid = pid; instance->filter_pids = p; instance->nr_filter_pids++; instance->len_filter_pids += sprintf(buf, "%d", pid); return 1; } static void add_filter_pid_all(int pid, int exclude) { struct buffer_instance *instance; for_all_instances(instance) add_filter_pid(instance, pid, exclude); } static void reset_save_ftrace_pid(struct buffer_instance *instance) { static char *path; if (!tracefs_file_exists(instance->tracefs, "set_ftrace_pid")) return; path = tracefs_instance_get_file(instance->tracefs, "set_ftrace_pid"); if (!path) return; reset_save_file_cond(path, RESET_DEFAULT_PRIO, "no pid", ""); tracefs_put_tracing_file(path); } static void update_ftrace_pid(struct buffer_instance *instance, const char *pid, int reset) { int fd = -1; char *path; int ret; if (!tracefs_file_exists(instance->tracefs, "set_ftrace_pid")) return; path = tracefs_instance_get_file(instance->tracefs, "set_ftrace_pid"); if (!path) return; fd = open(path, O_WRONLY | O_CLOEXEC | (reset ? O_TRUNC : 0)); tracefs_put_tracing_file(path); if (fd < 0) return; ret = write(fd, pid, strlen(pid)); /* * Older kernels required "-1" to disable pid */ if (ret < 0 && !strlen(pid)) ret = write(fd, "-1", 2); if (ret < 0) die("error writing to %s", path); /* add whitespace in case another pid is written */ write(fd, " ", 1); close(fd); } static void update_ftrace_pids(int reset) { struct buffer_instance *instance; struct filter_pids *pid; static int first = 1; char buf[100]; int rst; for_all_instances(instance) { if (first) reset_save_ftrace_pid(instance); rst = reset; for (pid = instance->filter_pids; pid; pid = pid->next) { if (pid->exclude) continue; snprintf(buf, 100, "%d ", pid->pid); update_ftrace_pid(instance, buf, rst); /* Only reset the first entry */ rst = 0; } } if (first) first = 0; } static void update_event_filters(struct buffer_instance *instance); static void update_pid_event_filters(struct buffer_instance *instance); static void append_filter_pid_range(char **filter, int *curr_len, const char *field, int start_pid, int end_pid, bool exclude) { const char *op = "", *op1, *op2, *op3; int len; if (*filter && **filter) op = exclude ? "&&" : "||"; /* Handle thus case explicitly so that we get `pid==3` instead of * `pid>=3&&pid<=3` for singleton ranges */ if (start_pid == end_pid) { #define FMT "%s(%s%s%d)" len = snprintf(NULL, 0, FMT, op, field, exclude ? "!=" : "==", start_pid); *filter = realloc(*filter, *curr_len + len + 1); if (!*filter) die("realloc"); len = snprintf(*filter + *curr_len, len + 1, FMT, op, field, exclude ? "!=" : "==", start_pid); *curr_len += len; return; #undef FMT } if (exclude) { op1 = "<"; op2 = "||"; op3 = ">"; } else { op1 = ">="; op2 = "&&"; op3 = "<="; } #define FMT "%s(%s%s%d%s%s%s%d)" len = snprintf(NULL, 0, FMT, op, field, op1, start_pid, op2, field, op3, end_pid); *filter = realloc(*filter, *curr_len + len + 1); if (!*filter) die("realloc"); len = snprintf(*filter + *curr_len, len + 1, FMT, op, field, op1, start_pid, op2, field, op3, end_pid); *curr_len += len; } /** * make_pid_filter - create a filter string to all pids against @field * @curr_filter: Append to a previous filter (may realloc). Can be NULL * @field: The field to compare the pids against * * Creates a new string or appends to an existing one if @curr_filter * is not NULL. The new string will contain a filter with all pids * in pid_filter list with the format (@field == pid) || .. * If @curr_filter is not NULL, it will add this string as: * (@curr_filter) && ((@field == pid) || ...) */ static char *make_pid_filter(struct buffer_instance *instance, char *curr_filter, const char *field) { int start_pid = -1, last_pid = -1; int last_exclude = -1; struct filter_pids *p; char *filter = NULL; int curr_len = 0; /* Use the new method if possible */ if (instance->have_set_event_pid) return NULL; if (!instance->filter_pids) return curr_filter; for (p = instance->filter_pids; p; p = p->next) { /* * PIDs are inserted in `filter_pids` from the front and that's * why we expect them in descending order here. */ if (p->pid == last_pid - 1 && p->exclude == last_exclude) { last_pid = p->pid; continue; } if (start_pid != -1) append_filter_pid_range(&filter, &curr_len, field, last_pid, start_pid, last_exclude); start_pid = last_pid = p->pid; last_exclude = p->exclude; } append_filter_pid_range(&filter, &curr_len, field, last_pid, start_pid, last_exclude); if (curr_filter) { char *save = filter; asprintf(&filter, "(%s)&&(%s)", curr_filter, filter); free(save); } return filter; } #define _STRINGIFY(x) #x #define STRINGIFY(x) _STRINGIFY(x) static int get_pid_addr_maps(struct buffer_instance *instance, int pid) { struct pid_addr_maps *maps = instance->pid_maps; struct tracecmd_proc_addr_map *map; unsigned long long begin, end; struct pid_addr_maps *m; char mapname[PATH_MAX+1]; char fname[PATH_MAX+1]; char buf[PATH_MAX+100]; FILE *f; int ret; int res; int i; sprintf(fname, "/proc/%d/exe", pid); ret = readlink(fname, mapname, PATH_MAX); if (ret >= PATH_MAX || ret < 0) return -ENOENT; mapname[ret] = 0; sprintf(fname, "/proc/%d/maps", pid); f = fopen(fname, "r"); if (!f) return -ENOENT; while (maps) { if (pid == maps->pid) break; maps = maps->next; } ret = -ENOMEM; if (!maps) { maps = calloc(1, sizeof(*maps)); if (!maps) goto out_fail; maps->pid = pid; maps->next = instance->pid_maps; instance->pid_maps = maps; } else { for (i = 0; i < maps->nr_lib_maps; i++) free(maps->lib_maps[i].lib_name); free(maps->lib_maps); maps->lib_maps = NULL; maps->nr_lib_maps = 0; free(maps->proc_name); } maps->proc_name = strdup(mapname); if (!maps->proc_name) goto out; while (fgets(buf, sizeof(buf), f)) { mapname[0] = '\0'; res = sscanf(buf, "%llx-%llx %*s %*x %*s %*d %"STRINGIFY(PATH_MAX)"s", &begin, &end, mapname); if (res == 3 && mapname[0] != '\0') { map = realloc(maps->lib_maps, (maps->nr_lib_maps + 1) * sizeof(*map)); if (!map) goto out_fail; maps->lib_maps = map; map[maps->nr_lib_maps].end = end; map[maps->nr_lib_maps].start = begin; map[maps->nr_lib_maps].lib_name = strdup(mapname); if (!map[maps->nr_lib_maps].lib_name) goto out_fail; maps->nr_lib_maps++; } } out: fclose(f); return 0; out_fail: fclose(f); if (maps) { for (i = 0; i < maps->nr_lib_maps; i++) free(maps->lib_maps[i].lib_name); if (instance->pid_maps != maps) { m = instance->pid_maps; while (m) { if (m->next == maps) { m->next = maps->next; break; } m = m->next; } } else instance->pid_maps = maps->next; free(maps->lib_maps); maps->lib_maps = NULL; maps->nr_lib_maps = 0; free(maps->proc_name); maps->proc_name = NULL; free(maps); } return ret; } static void get_filter_pid_maps(void) { struct buffer_instance *instance; struct filter_pids *p; for_all_instances(instance) { if (!instance->get_procmap) continue; for (p = instance->filter_pids; p; p = p->next) { if (p->exclude) continue; get_pid_addr_maps(instance, p->pid); } } } static void update_task_filter(void) { struct buffer_instance *instance; int pid = getpid(); if (no_filter) return; get_filter_pid_maps(); if (filter_task) add_filter_pid_all(pid, 0); for_all_instances(instance) { if (!instance->filter_pids) continue; if (instance->common_pid_filter) free(instance->common_pid_filter); instance->common_pid_filter = make_pid_filter(instance, NULL, "common_pid"); } update_ftrace_pids(1); for_all_instances(instance) update_pid_event_filters(instance); } static pid_t trace_waitpid(enum trace_type type, pid_t pid, int *status, int options) { int ret; if (type & TRACE_TYPE_STREAM) options |= WNOHANG; do { ret = waitpid(pid, status, options); if (ret != 0) return ret; if (type & TRACE_TYPE_STREAM) trace_stream_read(pids, recorder_threads, sleep_time); } while (1); } #ifndef __NR_pidfd_open #define __NR_pidfd_open 434 #endif static int pidfd_open(pid_t pid, unsigned int flags) { return syscall(__NR_pidfd_open, pid, flags); } static int trace_waitpidfd(id_t pidfd) { struct pollfd pollfd; pollfd.fd = pidfd; pollfd.events = POLLIN; while (!finished) { int ret = poll(&pollfd, 1, -1); /* If waitid was interrupted, keep waiting */ if (ret < 0 && errno == EINTR) continue; else if (ret < 0) return 1; else break; } return 0; } static int trace_wait_for_processes(struct buffer_instance *instance) { int ret = 0; int nr_fds = 0; int i; int *pidfds; struct filter_pids *pid; pidfds = malloc(sizeof(int) * instance->nr_process_pids); if (!pidfds) return 1; for (pid = instance->process_pids; pid && instance->nr_process_pids; pid = pid->next) { if (pid->exclude) { instance->nr_process_pids--; continue; } pidfds[nr_fds] = pidfd_open(pid->pid, 0); /* If the pid doesn't exist, the process has probably exited */ if (pidfds[nr_fds] < 0 && errno == ESRCH) { instance->nr_process_pids--; continue; } else if (pidfds[nr_fds] < 0) { ret = 1; goto out; } nr_fds++; instance->nr_process_pids--; } for (i = 0; i < nr_fds; i++) { if (trace_waitpidfd(pidfds[i])) { ret = 1; goto out; } } out: for (i = 0; i < nr_fds; i++) close(pidfds[i]); free(pidfds); return ret; } static void add_event_pid(struct buffer_instance *instance, const char *buf) { tracefs_instance_file_write(instance->tracefs, "set_event_pid", buf); } #ifndef NO_PTRACE /** * append_pid_filter - add a new pid to an existing filter * @curr_filter: the filter to append to. If NULL, then allocate one * @field: The fild to compare the pid to * @pid: The pid to add to. */ static char *append_pid_filter(char *curr_filter, const char *field, int pid) { char *filter; int len; len = snprintf(NULL, 0, "(%s==%d)||", field, pid); if (!curr_filter) { /* No need for +1 as we don't use the "||" */ filter = malloc(len); if (!filter) die("Failed to allocate pid filter"); sprintf(filter, "(%s==%d)", field, pid); } else { int indx = strlen(curr_filter); len += indx; filter = realloc(curr_filter, len + indx + 1); if (!filter) die("realloc"); sprintf(filter + indx, "||(%s==%d)", field, pid); } return filter; } static void append_sched_event(struct event_list *event, const char *field, int pid) { if (!event || !event->pid_filter) return; event->pid_filter = append_pid_filter(event->pid_filter, field, pid); } static void update_sched_events(struct buffer_instance *instance, int pid) { /* * Also make sure that the sched_switch to this pid * and wakeups of this pid are also traced. * Only need to do this if the events are active. */ append_sched_event(instance->sched_switch_event, "next_pid", pid); append_sched_event(instance->sched_wakeup_event, "pid", pid); append_sched_event(instance->sched_wakeup_new_event, "pid", pid); } static int open_instance_fd(struct buffer_instance *instance, const char *file, int flags); static void add_new_filter_child_pid(int pid, int child) { struct buffer_instance *instance; struct filter_pids *fpid; char buf[100]; for_all_instances(instance) { if (!instance->ptrace_child || !instance->filter_pids) continue; for (fpid = instance->filter_pids; fpid; fpid = fpid->next) { if (fpid->pid == pid) break; } if (!fpid) continue; add_filter_pid(instance, child, 0); sprintf(buf, "%d", child); update_ftrace_pid(instance, buf, 0); instance->common_pid_filter = append_pid_filter(instance->common_pid_filter, "common_pid", pid); if (instance->have_set_event_pid) { add_event_pid(instance, buf); } else { update_sched_events(instance, pid); update_event_filters(instance); } } } static void ptrace_attach(struct buffer_instance *instance, int pid) { int ret; ret = ptrace(PTRACE_ATTACH, pid, NULL, 0); if (ret < 0) { warning("Unable to trace process %d children", pid); do_ptrace = 0; return; } if (instance) add_filter_pid(instance, pid, 0); else add_filter_pid_all(pid, 0); } static void enable_ptrace(void) { if (!do_ptrace || !filter_task) return; ptrace(PTRACE_TRACEME, 0, NULL, 0); } static struct buffer_instance *get_intance_fpid(int pid) { struct buffer_instance *instance; struct filter_pids *fpid; for_all_instances(instance) { for (fpid = instance->filter_pids; fpid; fpid = fpid->next) { if (fpid->exclude) continue; if (fpid->pid == pid) break; } if (fpid) return instance; } return NULL; } static void ptrace_wait(enum trace_type type) { struct buffer_instance *instance; struct filter_pids *fpid; unsigned long send_sig; unsigned long child; int nr_pids = 0; siginfo_t sig; int main_pids; int cstatus; int status; int i = 0; int *pids; int event; int pid; int ret; for_all_instances(instance) nr_pids += instance->nr_filter_pids; pids = calloc(nr_pids, sizeof(int)); if (!pids) { warning("Unable to allocate array for %d PIDs", nr_pids); return; } for_all_instances(instance) { if (!instance->ptrace_child && !instance->get_procmap) continue; for (fpid = instance->filter_pids; fpid && i < nr_pids; fpid = fpid->next) { if (fpid->exclude) continue; pids[i++] = fpid->pid; } } main_pids = i; do { ret = trace_waitpid(type, -1, &status, WSTOPPED | __WALL); if (ret < 0) continue; pid = ret; if (WIFSTOPPED(status)) { event = (status >> 16) & 0xff; ptrace(PTRACE_GETSIGINFO, pid, NULL, &sig); send_sig = sig.si_signo; /* Don't send ptrace sigs to child */ if (send_sig == SIGTRAP || send_sig == SIGSTOP) send_sig = 0; switch (event) { case PTRACE_EVENT_FORK: case PTRACE_EVENT_VFORK: case PTRACE_EVENT_CLONE: /* forked a child */ ptrace(PTRACE_GETEVENTMSG, pid, NULL, &child); ptrace(PTRACE_SETOPTIONS, child, NULL, PTRACE_O_TRACEFORK | PTRACE_O_TRACEVFORK | PTRACE_O_TRACECLONE | PTRACE_O_TRACEEXIT); add_new_filter_child_pid(pid, child); ptrace(PTRACE_CONT, child, NULL, 0); break; case PTRACE_EVENT_EXIT: instance = get_intance_fpid(pid); if (instance && instance->get_procmap) get_pid_addr_maps(instance, pid); ptrace(PTRACE_GETEVENTMSG, pid, NULL, &cstatus); ptrace(PTRACE_DETACH, pid, NULL, NULL); break; } ptrace(PTRACE_SETOPTIONS, pid, NULL, PTRACE_O_TRACEFORK | PTRACE_O_TRACEVFORK | PTRACE_O_TRACECLONE | PTRACE_O_TRACEEXIT); ptrace(PTRACE_CONT, pid, NULL, send_sig); } if (WIFEXITED(status) || (WIFSTOPPED(status) && event == PTRACE_EVENT_EXIT)) { for (i = 0; i < nr_pids; i++) { if (pid == pids[i]) { pids[i] = 0; main_pids--; if (!main_pids) finished = 1; } } } } while (!finished && ret > 0); free(pids); } #else static inline void ptrace_wait(enum trace_type type) { } static inline void enable_ptrace(void) { } static inline void ptrace_attach(struct buffer_instance *instance, int pid) { } #endif /* NO_PTRACE */ static bool child_detached; static void daemonize_set_child_detached(int s) { child_detached = true; } static void daemonize_start(void) { int devnull; int status; int pid; int ret; pid = fork(); if (pid == -1) die("daemonize: fork failed"); if (pid == 0) { /* child */ /* * We keep stdout and stderr open to allow the user to * see output and errors after the daemonization (the user can * choose to supress it with >/dev/null if the user wants). * * No reason to keep stdin open (it might interfere with the * shell), we redirect it to /dev/null. */ devnull = open("/dev/null", O_RDONLY); if (devnull == -1) die("daemonize: open /dev/null failed"); if (devnull > 0) { if (dup2(devnull, 0) == -1) die("daemonize: dup2"); close(0); } return; /* * The child returns to back to the caller, but the parent waits until * SIGRTMIN is received from the child (by calling daemonize_finish()), * or the child exits for some reason (usually an indication of * an error), which ever comes first. * * Then the parent exits (with the status code of the child, * if it finished early, or with 0 if SIGRTMIN was received), * which causes the child (and its entire process tree) to be * inherited by init. * * Note that until the child calls daemonize_finish(), it still has * the same session id as the parent, so it can die together with * the parent before daemonization finished (purposefully, since the * user might send a quick Ctrl^C to cancel the command, and we don't * want background processes staying alive in that case) */ } else { /* parent */ struct sigaction sa = { /* disable SA_RESTART, to allow waitpid() to be interrupted by SIGRTMIN */ .sa_flags = 0, .sa_handler = daemonize_set_child_detached }; if (sigemptyset(&sa.sa_mask) == -1) die("daemonize: sigemptyset failed"); if (sigaddset(&sa.sa_mask, SIGRTMIN) == -1) die("daemonize: sigaddset failed"); if (sigprocmask(SIG_UNBLOCK, &sa.sa_mask, NULL) == -1) die("daemonize: sigprocmask failed"); if (sigaction(SIGRTMIN, &sa, NULL) == -1) die("daemonize: sigaction failed"); do { ret = waitpid(pid, &status, 0); } while (!child_detached && ((ret < 0) && (errno == EINTR))); if (child_detached) exit(0); else if (ret == pid) exit(WIFEXITED(status)); else die("daemonize: waitpid failed"); __builtin_unreachable(); } } static void daemonize_finish(void) { /* * setsid() will also set the sid to be the pgid to all currently * running threads in the process group (such as the tsync thread). */ if (setsid() == -1) die("daemonize: setsid"); if (kill(getppid(), SIGRTMIN) == -1) die("daemonize: kill"); make_pid_file(RECORD_PIDFILE); created_pidfile = true; } static void trace_or_sleep(enum trace_type type, bool pwait) { int i; if (pwait) ptrace_wait(type); else if (type & TRACE_TYPE_STREAM) { /* Returns zero if it did not read anything (and did a sleep) */ if (trace_stream_read(pids, recorder_threads, sleep_time) > 0) return; /* Force a flush if nothing was read (including on errors) */ for (i = 0; i < recorder_threads; i++) { if (pids[i].pid > 0) { kill(pids[i].pid, SIGUSR2); } } } else sleep(10); } static int change_user(const char *user) { struct passwd *pwd; if (!user) return 0; pwd = getpwnam(user); if (!pwd) return -1; if (initgroups(user, pwd->pw_gid) < 0) return -1; if (setgid(pwd->pw_gid) < 0) return -1; if (setuid(pwd->pw_uid) < 0) return -1; if (setenv("HOME", pwd->pw_dir, 1) < 0) return -1; if (setenv("USER", pwd->pw_name, 1) < 0) return -1; if (setenv("LOGNAME", pwd->pw_name, 1) < 0) return -1; return 0; } static void execute_program(int argc, char **argv) { char buf[PATH_MAX + NAME_MAX + 1]; char *path; char *entry; char *saveptr; /* * if command specified by user is neither absolute nor * relative than we search for it in $PATH. */ if (!strchr(argv[0], '/')) { path = getenv("PATH"); if (!path) die("can't search for '%s' if $PATH is NULL", argv[0]); /* Do not modify the actual environment variable */ path = strdup(path); if (!path) die("Failed to allocate PATH"); for (entry = strtok_r(path, ":", &saveptr); entry; entry = strtok_r(NULL, ":", &saveptr)) { snprintf(buf, sizeof(buf), "%s/%s", entry, argv[0]); /* does it exist and can we execute it? */ if (access(buf, X_OK) == 0) break; } free(path); } else { strncpy(buf, argv[0], sizeof(buf)); } tracecmd_enable_tracing(); if (execve(buf, argv, environ)) { fprintf(stderr, "\n********************\n"); fprintf(stderr, " Unable to exec %s\n", argv[0]); fprintf(stderr, "********************\n"); die("Failed to exec %s", argv[0]); } } static void run_cmd(enum trace_type type, const char *user, int argc, char **argv) { int status; int pid; if ((pid = fork()) < 0) die("failed to fork"); if (!pid) { /* child */ update_task_filter(); if (!fork_process) enable_ptrace(); /* * If we are using stderr for stdout, switch * it back to the saved stdout for the code we run. */ if (save_stdout >= 0) { close(1); dup2(save_stdout, 1); close(save_stdout); } if (change_user(user) < 0) die("Failed to change user to %s", user); execute_program(argc, argv); } if (do_daemonize) daemonize_finish(); if (fork_process) exit(0); if (do_ptrace) { ptrace_attach(NULL, pid); ptrace_wait(type); } else trace_waitpid(type, pid, &status, 0); if (type & (TRACE_TYPE_START | TRACE_TYPE_SET)) exit(0); } static void set_plugin_instance(struct buffer_instance *instance, const char *name) { char *path; char zero = '0'; int ret; int fd; if (is_guest(instance)) return; path = tracefs_instance_get_file(instance->tracefs, "current_tracer"); fd = open(path, O_WRONLY); if (fd < 0) { /* * Legacy kernels do not have current_tracer file, and they * always use nop. So, it doesn't need to try to change the * plugin for those if name is "nop". */ if (!strncmp(name, "nop", 3)) { tracefs_put_tracing_file(path); return; } die("Opening '%s'", path); } ret = write(fd, name, strlen(name)); close(fd); if (ret < 0) die("writing to '%s'", path); tracefs_put_tracing_file(path); if (strncmp(name, "function", 8) != 0) return; /* Make sure func_stack_trace option is disabled */ /* First try instance file, then top level */ path = tracefs_instance_get_file(instance->tracefs, "options/func_stack_trace"); fd = open(path, O_WRONLY); if (fd < 0) { tracefs_put_tracing_file(path); path = tracefs_get_tracing_file("options/func_stack_trace"); fd = open(path, O_WRONLY); if (fd < 0) { tracefs_put_tracing_file(path); return; } } /* * Always reset func_stack_trace to zero. Don't bother saving * the original content. */ add_reset_file(path, "0", RESET_HIGH_PRIO); tracefs_put_tracing_file(path); write(fd, &zero, 1); close(fd); } static void set_plugin(const char *name) { struct buffer_instance *instance; for_all_instances(instance) set_plugin_instance(instance, name); } static void save_option(struct buffer_instance *instance, const char *option) { struct opt_list *opt; opt = malloc(sizeof(*opt)); if (!opt) die("Failed to allocate option"); opt->next = instance->options; instance->options = opt; opt->option = option; } static int set_option(struct buffer_instance *instance, const char *option) { FILE *fp; char *path; path = tracefs_instance_get_file(instance->tracefs, "trace_options"); fp = fopen(path, "w"); if (!fp) warning("writing to '%s'", path); tracefs_put_tracing_file(path); if (!fp) return -1; fwrite(option, 1, strlen(option), fp); fclose(fp); return 0; } static void disable_func_stack_trace_instance(struct buffer_instance *instance) { struct stat st; char *content; char *path; char *cond; int size; int ret; if (is_guest(instance)) return; path = tracefs_instance_get_file(instance->tracefs, "current_tracer"); ret = stat(path, &st); tracefs_put_tracing_file(path); if (ret < 0) return; content = tracefs_instance_file_read(instance->tracefs, "current_tracer", &size); cond = strstrip(content); if (memcmp(cond, "function", size - (cond - content)) !=0) goto out; set_option(instance, "nofunc_stack_trace"); out: free(content); } static void disable_func_stack_trace(void) { struct buffer_instance *instance; for_all_instances(instance) disable_func_stack_trace_instance(instance); } static void add_reset_options(struct buffer_instance *instance) { struct opt_list *opt; const char *option; char *content; char *path; char *ptr; int len; if (keep) return; path = tracefs_instance_get_file(instance->tracefs, "trace_options"); content = get_file_content(path); for (opt = instance->options; opt; opt = opt->next) { option = opt->option; len = strlen(option); ptr = content; again: ptr = strstr(ptr, option); if (ptr) { /* First make sure its the option we want */ if (ptr[len] != '\n') { ptr += len; goto again; } if (ptr - content >= 2 && strncmp(ptr - 2, "no", 2) == 0) { /* Make sure this isn't ohno-option */ if (ptr > content + 2 && *(ptr - 3) != '\n') { ptr += len; goto again; } /* we enabled it */ ptr[len] = 0; add_reset_file(path, ptr-2, RESET_DEFAULT_PRIO); ptr[len] = '\n'; continue; } /* make sure this is our option */ if (ptr > content && *(ptr - 1) != '\n') { ptr += len; goto again; } /* this option hasn't changed, ignore it */ continue; } /* ptr is NULL, not found, maybe option is a no */ if (strncmp(option, "no", 2) != 0) /* option is really not found? */ continue; option += 2; len = strlen(option); ptr = content; loop: ptr = strstr(content, option); if (!ptr) /* Really not found? */ continue; /* make sure this is our option */ if (ptr[len] != '\n') { ptr += len; goto loop; } if (ptr > content && *(ptr - 1) != '\n') { ptr += len; goto loop; } add_reset_file(path, option, RESET_DEFAULT_PRIO); } tracefs_put_tracing_file(path); free(content); } static void set_options(void) { struct buffer_instance *instance; struct opt_list *opt; int ret; for_all_instances(instance) { add_reset_options(instance); while (instance->options) { opt = instance->options; instance->options = opt->next; ret = set_option(instance, opt->option); if (ret < 0) die("Failed to set ftrace option %s", opt->option); free(opt); } } } static void set_saved_cmdlines_size(struct common_record_context *ctx) { int fd, len, ret = -1; char *path, *str; if (!ctx->saved_cmdlines_size) return; path = tracefs_get_tracing_file("saved_cmdlines_size"); if (!path) goto err; reset_save_file(path, RESET_DEFAULT_PRIO); fd = open(path, O_WRONLY); tracefs_put_tracing_file(path); if (fd < 0) goto err; len = asprintf(&str, "%d", ctx->saved_cmdlines_size); if (len < 0) die("%s couldn't allocate memory", __func__); if (write(fd, str, len) > 0) ret = 0; close(fd); free(str); err: if (ret) warning("Couldn't set saved_cmdlines_size"); } static int trace_check_file_exists(struct buffer_instance *instance, char *file) { struct stat st; char *path; int ret; path = tracefs_instance_get_file(instance->tracefs, file); ret = stat(path, &st); tracefs_put_tracing_file(path); return ret < 0 ? 0 : 1; } static int use_old_event_method(void) { static int old_event_method; static int processed; if (processed) return old_event_method; /* Check if the kernel has the events/enable file */ if (!trace_check_file_exists(&top_instance, "events/enable")) old_event_method = 1; processed = 1; return old_event_method; } static void old_update_events(const char *name, char update) { char *path; FILE *fp; int ret; if (strcmp(name, "all") == 0) name = "*:*"; /* need to use old way */ path = tracefs_get_tracing_file("set_event"); fp = fopen(path, "w"); if (!fp) die("opening '%s'", path); tracefs_put_tracing_file(path); /* Disable the event with "!" */ if (update == '0') fwrite("!", 1, 1, fp); ret = fwrite(name, 1, strlen(name), fp); if (ret < 0) die("bad event '%s'", name); ret = fwrite("\n", 1, 1, fp); if (ret < 0) die("bad event '%s'", name); fclose(fp); return; } static void reset_events_instance(struct buffer_instance *instance) { glob_t globbuf; char *path; char c; int fd; int i; int ret; if (is_guest(instance)) return; if (use_old_event_method()) { /* old way only had top instance */ if (!is_top_instance(instance)) return; old_update_events("all", '0'); return; } c = '0'; path = tracefs_instance_get_file(instance->tracefs, "events/enable"); fd = open(path, O_WRONLY); if (fd < 0) die("opening to '%s'", path); ret = write(fd, &c, 1); close(fd); tracefs_put_tracing_file(path); path = tracefs_instance_get_file(instance->tracefs, "events/*/filter"); globbuf.gl_offs = 0; ret = glob(path, 0, NULL, &globbuf); tracefs_put_tracing_file(path); if (ret < 0) return; for (i = 0; i < globbuf.gl_pathc; i++) { path = globbuf.gl_pathv[i]; fd = open(path, O_WRONLY); if (fd < 0) die("opening to '%s'", path); ret = write(fd, &c, 1); close(fd); } globfree(&globbuf); } static void reset_events(void) { struct buffer_instance *instance; for_all_instances(instance) reset_events_instance(instance); } enum { STATE_NEWLINE, STATE_SKIP, STATE_COPY, }; static char *read_file(const char *file) { char stbuf[BUFSIZ]; char *buf = NULL; int size = 0; char *nbuf; int fd; int r; fd = open(file, O_RDONLY); if (fd < 0) return NULL; do { r = read(fd, stbuf, BUFSIZ); if (r <= 0) continue; nbuf = realloc(buf, size+r+1); if (!nbuf) { free(buf); buf = NULL; break; } buf = nbuf; memcpy(buf+size, stbuf, r); size += r; } while (r > 0); close(fd); if (r == 0 && size > 0) buf[size] = '\0'; return buf; } static void read_error_log(const char *log) { char *buf, *line; char *start = NULL; char *p; buf = read_file(log); if (!buf) return; line = buf; /* Only the last lines have meaning */ while ((p = strstr(line, "\n")) && p[1]) { if (line[0] != ' ') start = line; line = p + 1; } if (start) printf("%s", start); free(buf); } static void show_error(const char *file, const char *type) { struct stat st; char *path = strdup(file); char *p; int ret; if (!path) die("Could not allocate memory"); p = strstr(path, "tracing"); if (p) { if (strncmp(p + sizeof("tracing"), "instances", sizeof("instances") - 1) == 0) { p = strstr(p + sizeof("tracing") + sizeof("instances"), "/"); if (!p) goto read_file; } else { p += sizeof("tracing") - 1; } ret = asprintf(&p, "%.*s/error_log", (int)(p - path), path); if (ret < 0) die("Could not allocate memory"); ret = stat(p, &st); if (ret < 0) { free(p); goto read_file; } read_error_log(p); goto out; } read_file: p = read_file(path); if (p) printf("%s", p); out: printf("Failed %s of %s\n", type, file); free(path); return; } static void write_filter(const char *file, const char *filter) { if (write_file(file, filter) < 0) show_error(file, "filter"); } static void clear_filter(const char *file) { write_filter(file, "0"); } static void write_trigger(const char *file, const char *trigger) { if (write_file(file, trigger) < 0) show_error(file, "trigger"); } static int clear_trigger(const char *file) { char trigger[BUFSIZ]; char *save = NULL; char *line; char *buf; int len; int ret; buf = read_file(file); if (!buf) { perror(file); return 0; } trigger[0] = '!'; for (line = strtok_r(buf, "\n", &save); line; line = strtok_r(NULL, "\n", &save)) { if (line[0] == '#') continue; len = strlen(line); if (len > BUFSIZ - 2) len = BUFSIZ - 2; strncpy(trigger + 1, line, len); trigger[len + 1] = '\0'; /* We don't want any filters or extra on the line */ strtok(trigger, " "); write_file(file, trigger); } free(buf); /* * Some triggers have an order in removing them. * They will not be removed if done in the wrong order. */ buf = read_file(file); if (!buf) return 0; ret = 0; for (line = strtok(buf, "\n"); line; line = strtok(NULL, "\n")) { if (line[0] == '#') continue; ret = 1; break; } free(buf); return ret; } static void clear_func_filter(const char *file) { char filter[BUFSIZ]; struct stat st; char *line; char *buf; char *p; int len; int ret; int fd; /* Function filters may not exist */ ret = stat(file, &st); if (ret < 0) return; /* First zero out normal filters */ fd = open(file, O_WRONLY | O_TRUNC); if (fd < 0) die("opening to '%s'", file); close(fd); buf = read_file(file); if (!buf) { perror(file); return; } /* Now remove filters */ filter[0] = '!'; /* * To delete a filter, we need to write a '!filter' * to the file for each filter. */ for (line = strtok(buf, "\n"); line; line = strtok(NULL, "\n")) { if (line[0] == '#') continue; len = strlen(line); if (len > BUFSIZ - 2) len = BUFSIZ - 2; strncpy(filter + 1, line, len); filter[len + 1] = '\0'; /* * To remove "unlimited" filters, we must remove * the ":unlimited" from what we write. */ if ((p = strstr(filter, ":unlimited"))) { *p = '\0'; len = p - filter; } /* * The write to this file expects white space * at the end :-p */ filter[len] = '\n'; filter[len+1] = '\0'; write_file(file, filter); } free(buf); } static void update_reset_triggers(void) { struct reset_file *reset; while (reset_triggers) { reset = reset_triggers; reset_triggers = reset->next; clear_trigger(reset->path); free(reset->path); free(reset); } } static void reset_buffer_files_instance(struct buffer_instance *instance) { if (instance->old_buffer_size != instance->buffer_size) tracefs_instance_set_buffer_size(instance->tracefs, instance->old_buffer_size, -1); if (instance->old_subbuf_size != instance->subbuf_size) tracefs_instance_set_subbuf_size(instance->tracefs, instance->old_subbuf_size); } static void reset_buffer_files(void) { struct buffer_instance *instance; for_all_instances(instance) { reset_buffer_files_instance(instance); } } static void update_reset_files(void) { struct reset_file *reset; while (reset_files) { reset = reset_files; reset_files = reset->next; if (!keep) write_file(reset->path, reset->reset); free(reset->path); free(reset->reset); free(reset); } reset_buffer_files(); } static void update_event(struct event_list *event, const char *filter, int filter_only, char update) { const char *name = event->event; FILE *fp; char *path; int ret; if (use_old_event_method()) { if (filter_only) return; old_update_events(name, update); return; } if (filter && event->filter_file) { add_reset_file(event->filter_file, "0", RESET_DEFAULT_PRIO); write_filter(event->filter_file, filter); } if (event->trigger_file) { add_reset_trigger(event->trigger_file); clear_trigger(event->trigger_file); write_trigger(event->trigger_file, event->trigger); /* Make sure we don't write this again */ free(event->trigger_file); free(event->trigger); event->trigger_file = NULL; event->trigger = NULL; } if (filter_only || !event->enable_file) return; path = event->enable_file; fp = fopen(path, "w"); if (!fp) die("writing to '%s'", path); ret = fwrite(&update, 1, 1, fp); fclose(fp); if (ret < 0) die("writing to '%s'", path); } /* * The debugfs file tracing_enabled needs to be deprecated. * But just in case anyone fiddled with it. If it exists, * make sure it is one. * No error checking needed here. */ static void check_tracing_enabled(void) { static int fd = -1; char *path; if (fd < 0) { path = tracefs_get_tracing_file("tracing_enabled"); fd = open(path, O_WRONLY | O_CLOEXEC); tracefs_put_tracing_file(path); if (fd < 0) return; } write(fd, "1", 1); } static int open_instance_fd(struct buffer_instance *instance, const char *file, int flags) { int fd; char *path; path = tracefs_instance_get_file(instance->tracefs, file); fd = open(path, flags); if (fd < 0) { /* instances may not be created yet */ if (is_top_instance(instance)) die("opening '%s'", path); } tracefs_put_tracing_file(path); return fd; } static int open_tracing_on(struct buffer_instance *instance) { int fd = instance->tracing_on_fd; /* OK, we keep zero for stdin */ if (fd > 0) return fd; fd = open_instance_fd(instance, "tracing_on", O_RDWR | O_CLOEXEC); if (fd < 0) { return fd; } instance->tracing_on_fd = fd; return fd; } static void write_tracing_on(struct buffer_instance *instance, int on) { int ret; int fd; if (is_guest(instance)) return; fd = open_tracing_on(instance); if (fd < 0) return; if (on) ret = write(fd, "1", 1); else ret = write(fd, "0", 1); if (ret < 0) die("writing 'tracing_on'"); } static int read_tracing_on(struct buffer_instance *instance) { int fd; char buf[10]; int ret; if (is_guest(instance)) return -1; fd = open_tracing_on(instance); if (fd < 0) return fd; ret = read(fd, buf, 10); if (ret <= 0) die("Reading 'tracing_on'"); buf[9] = 0; ret = atoi(buf); return ret; } static void reset_max_latency_instance(void) { struct buffer_instance *instance; for_all_instances(instance) reset_max_latency(instance); } void tracecmd_enable_tracing(void) { struct buffer_instance *instance; check_tracing_enabled(); for_all_instances(instance) write_tracing_on(instance, 1); if (latency) reset_max_latency_instance(); } void tracecmd_disable_tracing(void) { struct buffer_instance *instance; for_all_instances(instance) write_tracing_on(instance, 0); } void tracecmd_disable_all_tracing(int disable_tracer) { struct buffer_instance *instance; tracecmd_disable_tracing(); disable_func_stack_trace(); if (disable_tracer) set_plugin("nop"); reset_events(); /* Force close and reset of ftrace pid file */ for_all_instances(instance) update_ftrace_pid(instance, "", 1); clear_trace_instances(); } static void update_sched_event(struct buffer_instance *instance, struct event_list *event, const char *field) { if (!event) return; event->pid_filter = make_pid_filter(instance, event->pid_filter, field); } static void update_event_filters(struct buffer_instance *instance) { struct event_list *event; char *event_filter; int free_it; int len; int common_len = 0; if (instance->common_pid_filter) common_len = strlen(instance->common_pid_filter); for (event = instance->events; event; event = event->next) { if (!event->neg) { free_it = 0; if (event->filter) { if (!instance->common_pid_filter) /* * event->pid_filter is only created if * common_pid_filter is. No need to check that. * Just use the current event->filter. */ event_filter = event->filter; else if (event->pid_filter) { free_it = 1; len = common_len + strlen(event->pid_filter) + strlen(event->filter) + strlen("()&&(||)") + 1; event_filter = malloc(len); if (!event_filter) die("Failed to allocate event_filter"); sprintf(event_filter, "(%s)&&(%s||%s)", event->filter, instance->common_pid_filter, event->pid_filter); } else { free_it = 1; len = common_len + strlen(event->filter) + strlen("()&&()") + 1; event_filter = malloc(len); if (!event_filter) die("Failed to allocate event_filter"); sprintf(event_filter, "(%s)&&(%s)", event->filter, instance->common_pid_filter); } } else { /* event->pid_filter only exists when common_pid_filter does */ if (!instance->common_pid_filter) continue; if (event->pid_filter) { free_it = 1; len = common_len + strlen(event->pid_filter) + strlen("||") + 1; event_filter = malloc(len); if (!event_filter) die("Failed to allocate event_filter"); sprintf(event_filter, "%s||%s", instance->common_pid_filter, event->pid_filter); } else event_filter = instance->common_pid_filter; } update_event(event, event_filter, 1, '1'); if (free_it) free(event_filter); } } } static void update_pid_filters(struct buffer_instance *instance) { struct filter_pids *p; char *filter; char *str; int len; int ret; int fd; if (is_guest(instance)) return; fd = open_instance_fd(instance, "set_event_pid", O_WRONLY | O_CLOEXEC | O_TRUNC); if (fd < 0) die("Failed to access set_event_pid"); len = instance->len_filter_pids + instance->nr_filter_pids; filter = malloc(len); if (!filter) die("Failed to allocate pid filter"); str = filter; for (p = instance->filter_pids; p; p = p->next) { if (p->exclude) continue; len = sprintf(str, "%d ", p->pid); str += len; } if (filter == str) goto out; len = str - filter; str = filter; do { ret = write(fd, str, len); if (ret < 0) die("Failed to write to set_event_pid"); str += ret; len -= ret; } while (ret >= 0 && len); out: free(filter); close(fd); } static void update_pid_event_filters(struct buffer_instance *instance) { if (instance->have_set_event_pid) return update_pid_filters(instance); /* * Also make sure that the sched_switch to this pid * and wakeups of this pid are also traced. * Only need to do this if the events are active. */ update_sched_event(instance, instance->sched_switch_event, "next_pid"); update_sched_event(instance, instance->sched_wakeup_event, "pid"); update_sched_event(instance, instance->sched_wakeup_new_event, "pid"); update_event_filters(instance); } #define MASK_STR_MAX 4096 /* Don't expect more than 32768 CPUS */ static char *alloc_mask_from_hex(struct buffer_instance *instance, const char *str) { char *cpumask; if (strcmp(str, "-1") == 0) { /* set all CPUs */ int bytes = (instance->cpu_count + 7) / 8; int last = instance->cpu_count % 8; int i; cpumask = malloc(MASK_STR_MAX); if (!cpumask) die("can't allocate cpumask"); if (bytes > (MASK_STR_MAX-1)) { warning("cpumask can't handle more than 32768 CPUS!"); bytes = MASK_STR_MAX-1; } sprintf(cpumask, "%x", (1 << last) - 1); for (i = 1; i < bytes; i++) cpumask[i] = 'f'; cpumask[i+1] = 0; } else { cpumask = strdup(str); if (!cpumask) die("can't allocate cpumask"); } return cpumask; } static void set_mask(struct buffer_instance *instance) { struct stat st; char *path; int fd; int ret; if (is_guest(instance)) return; if (!instance->cpumask) return; path = tracefs_instance_get_file(instance->tracefs, "tracing_cpumask"); if (!path) die("could not allocate path"); reset_save_file(path, RESET_DEFAULT_PRIO); ret = stat(path, &st); if (ret < 0) { warning("%s not found", path); goto out; } fd = open(path, O_WRONLY | O_TRUNC); if (fd < 0) die("could not open %s\n", path); write(fd, instance->cpumask, strlen(instance->cpumask)); close(fd); out: tracefs_put_tracing_file(path); free(instance->cpumask); instance->cpumask = NULL; } static void enable_events(struct buffer_instance *instance) { struct event_list *event; if (is_guest(instance)) return; for (event = instance->events; event; event = event->next) { if (!event->neg) update_event(event, event->filter, 0, '1'); } /* Now disable any events */ for (event = instance->events; event; event = event->next) { if (event->neg) update_event(event, NULL, 0, '0'); } } void tracecmd_enable_events(void) { enable_events(first_instance); } static void set_clock(struct common_record_context *ctx, struct buffer_instance *instance) { const char *clock; char *path; char *content; char *str; if (is_guest(instance)) return; if (instance->clock) clock = instance->clock; else clock = ctx->clock; if (!clock) return; /* The current clock is in brackets, reset it when we are done */ content = tracefs_instance_file_read(instance->tracefs, "trace_clock", NULL); /* check if first clock is set */ if (*content == '[') str = strtok(content+1, "]"); else { str = strtok(content, "["); if (!str) die("Can not find clock in trace_clock"); str = strtok(NULL, "]"); } path = tracefs_instance_get_file(instance->tracefs, "trace_clock"); add_reset_file(path, str, RESET_DEFAULT_PRIO); free(content); tracefs_put_tracing_file(path); tracefs_instance_file_write(instance->tracefs, "trace_clock", clock); } static void set_max_graph_depth(struct buffer_instance *instance, char *max_graph_depth) { char *path; int ret; if (is_guest(instance)) return; path = tracefs_instance_get_file(instance->tracefs, "max_graph_depth"); reset_save_file(path, RESET_DEFAULT_PRIO); tracefs_put_tracing_file(path); ret = tracefs_instance_file_write(instance->tracefs, "max_graph_depth", max_graph_depth); if (ret < 0) die("could not write to max_graph_depth"); } static bool check_file_in_dir(char *dir, char *file) { struct stat st; char *path; int ret; ret = asprintf(&path, "%s/%s", dir, file); if (ret < 0) die("Failed to allocate id file path for %s/%s", dir, file); ret = stat(path, &st); free(path); if (ret < 0 || S_ISDIR(st.st_mode)) return false; return true; } /** * create_event - create and event descriptor * @instance: instance to use * @path: path to event attribute * @old_event: event descriptor to use as base * * NOTE: the function purpose is to create a data structure to describe * an ftrace event. During the process it becomes handy to change the * string `path`. So, do not rely on the content of `path` after you * invoke this function. */ static struct event_list * create_event(struct buffer_instance *instance, char *path, struct event_list *old_event) { struct event_list *event; struct stat st; char *path_dirname; char *p; int ret; event = malloc(sizeof(*event)); if (!event) die("Failed to allocate event"); *event = *old_event; add_event(instance, event); if (event->filter || filter_task || instance->filter_pids) { event->filter_file = strdup(path); if (!event->filter_file) die("malloc filter file"); } path_dirname = dirname(path); ret = asprintf(&p, "%s/enable", path_dirname); if (ret < 0) die("Failed to allocate enable path for %s", path); ret = stat(p, &st); if (ret >= 0) event->enable_file = p; else free(p); if (old_event->trigger) { if (check_file_in_dir(path_dirname, "trigger")) { event->trigger = strdup(old_event->trigger); ret = asprintf(&p, "%s/trigger", path_dirname); if (ret < 0) die("Failed to allocate trigger path for %s", path); event->trigger_file = p; } else { /* Check if this is event or system. * Systems do not have trigger files by design */ if (check_file_in_dir(path_dirname, "id")) die("trigger specified but not supported by this kernel"); } } return event; } static void make_sched_event(struct buffer_instance *instance, struct event_list **event, struct event_list *sched, const char *sched_path) { char *path_dirname; char *tmp_file; char *path; int ret; /* Do nothing if the event already exists */ if (*event) return; /* we do not want to corrupt sched->filter_file when using dirname() */ tmp_file = strdup(sched->filter_file); if (!tmp_file) die("Failed to allocate path for %s", sched_path); path_dirname = dirname(tmp_file); ret = asprintf(&path, "%s/%s/filter", path_dirname, sched_path); free(tmp_file); if (ret < 0) die("Failed to allocate path for %s", sched_path); *event = create_event(instance, path, sched); free(path); } static void test_event(struct event_list *event, const char *path, const char *name, struct event_list **save, int len) { path += len - strlen(name); if (strcmp(path, name) != 0) return; *save = event; } static void print_event(const char *fmt, ...) { va_list ap; if (!show_status) return; va_start(ap, fmt); vprintf(fmt, ap); va_end(ap); printf("\n"); } static int expand_event_files(struct buffer_instance *instance, const char *file, struct event_list *old_event) { struct event_list **save_event_tail = instance->event_next; struct event_list *sched_event = NULL; struct event_list *event; glob_t globbuf; char *path; char *p; int ret; int i; ret = asprintf(&p, "events/%s/filter", file); if (ret < 0) die("Failed to allocate event filter path for %s", file); path = tracefs_instance_get_file(instance->tracefs, p); globbuf.gl_offs = 0; ret = glob(path, 0, NULL, &globbuf); tracefs_put_tracing_file(path); free(p); if (ret < 0) die("No filters found"); for (i = 0; i < globbuf.gl_pathc; i++) { int len; path = globbuf.gl_pathv[i]; event = create_event(instance, path, old_event); print_event("%s\n", path); len = strlen(path); test_event(event, path, "sched", &sched_event, len); test_event(event, path, "sched/sched_switch", &instance->sched_switch_event, len); test_event(event, path, "sched/sched_wakeup_new", &instance->sched_wakeup_new_event, len); test_event(event, path, "sched/sched_wakeup", &instance->sched_wakeup_event, len); } if (sched_event && sched_event->filter_file) { /* make sure all sched events exist */ make_sched_event(instance, &instance->sched_switch_event, sched_event, "sched_switch"); make_sched_event(instance, &instance->sched_wakeup_event, sched_event, "sched_wakeup"); make_sched_event(instance, &instance->sched_wakeup_new_event, sched_event, "sched_wakeup_new"); } globfree(&globbuf); /* If the event list tail changed, that means events were added */ return save_event_tail == instance->event_next; } static int expand_events_all(struct buffer_instance *instance, char *system_name, char *event_name, struct event_list *event) { char *name; int ret; ret = asprintf(&name, "%s/%s", system_name, event_name); if (ret < 0) die("Failed to allocate system/event for %s/%s", system_name, event_name); ret = expand_event_files(instance, name, event); free(name); return ret; } static void expand_event(struct buffer_instance *instance, struct event_list *event) { const char *name = event->event; char *str; char *ptr; int ret; /* * We allow the user to use "all" to enable all events. * Expand event_selection to all systems. */ if (strcmp(name, "all") == 0) { expand_event_files(instance, "*", event); return; } str = strdup(name); if (!str) die("Failed to allocate %s string", name); ptr = strchr(str, ':'); if (ptr) { *ptr = '\0'; ptr++; if (strlen(ptr)) ret = expand_events_all(instance, str, ptr, event); else ret = expand_events_all(instance, str, "*", event); if (!ignore_event_not_found && ret) die("No events enabled with %s", name); goto out; } /* No ':' so enable all matching systems and events */ ret = expand_event_files(instance, str, event); ret &= expand_events_all(instance, "*", str, event); if (event->trigger) ret &= expand_events_all(instance, str, "*", event); if (!ignore_event_not_found && ret) die("No events enabled with %s", name); out: free(str); } static void expand_event_instance(struct buffer_instance *instance) { struct event_list *compressed_list = instance->events; struct event_list *event; if (is_guest(instance)) return; reset_event_list(instance); while (compressed_list) { event = compressed_list; compressed_list = event->next; expand_event(instance, event); free(event->trigger); free(event); } } static void expand_event_list(void) { struct buffer_instance *instance; if (use_old_event_method()) return; for_all_instances(instance) expand_event_instance(instance); } static void finish(void) { static int secs = 1; sleep_time = 0; /* all done */ if (recorder) { tracecmd_stop_recording(recorder); /* * We could just call the alarm if the above returned non zero, * as zero is suppose to guarantee that the reader woke up. * But as this is called from a signal handler, that may not * necessarily be the case. */ alarm(secs++); } finished = 1; } static void flush(void) { if (recorder) tracecmd_flush_recording(recorder, false); } static void do_sig(int sig) { switch (sig) { case SIGALRM: case SIGUSR1: case SIGINT: case SIGTERM: return finish(); case SIGUSR2: return flush(); } } static struct addrinfo *do_getaddrinfo(const char *host, unsigned int port, enum port_type type) { struct addrinfo *results; struct addrinfo hints; char buf[BUFSIZ]; int s; snprintf(buf, BUFSIZ, "%u", port); memset(&hints, 0, sizeof(hints)); hints.ai_family = AF_UNSPEC; hints.ai_socktype = type == USE_TCP ? SOCK_STREAM : SOCK_DGRAM; s = getaddrinfo(host, buf, &hints, &results); if (s != 0) { gai_err = gai_strerror(s); return NULL; } dprint("Attached port %s: %d to results: %p\n", type == USE_TCP ? "TCP" : "UDP", port, results); return results; } static int connect_addr(struct addrinfo *results) { struct addrinfo *rp; int sfd = -1; for (rp = results; rp != NULL; rp = rp->ai_next) { sfd = socket(rp->ai_family, rp->ai_socktype, rp->ai_protocol); if (sfd == -1) continue; set_tcp_no_delay(sfd, rp->ai_socktype); if (connect(sfd, rp->ai_addr, rp->ai_addrlen) != -1) break; close(sfd); } if (rp == NULL) return -1; dprint("connect results: %p with fd: %d\n", results, sfd); return sfd; } static int connect_port(const char *host, unsigned int port, enum port_type type) { struct addrinfo *results; int sfd; if (type == USE_VSOCK) return trace_vsock_open(atoi(host), port); results = do_getaddrinfo(host, port, type); if (!results) die("connecting to %s server %s:%u", type == USE_TCP ? "TCP" : "UDP", host, port); sfd = connect_addr(results); freeaddrinfo(results); if (sfd < 0) die("Can not connect to %s server %s:%u", type == USE_TCP ? "TCP" : "UDP", host, port); return sfd; } static int do_accept(int sd) { int cd; for (;;) { dprint("Wait on accept: %d\n", sd); cd = accept(sd, NULL, NULL); dprint("accepted: %d\n", cd); if (cd < 0) { if (errno == EINTR) continue; die("accept"); } return cd; } return -1; } static char *parse_guest_name(char *gname, int *cid, int *port, struct addrinfo **res) { struct trace_guest *guest = NULL; struct addrinfo *result; char *ip = NULL; char *p; *res = NULL; *port = -1; for (p = gname + strlen(gname); p > gname; p--) { if (*p == ':') break; } if (p > gname) { *p = '\0'; *port = atoi(p + 1); } *cid = -1; p = strrchr(gname, '@'); if (p) { *p = '\0'; *cid = atoi(p + 1); } else if (is_digits(gname)) { *cid = atoi(gname); } else { /* Check if this is an IP address */ if (strstr(gname, ":") || strstr(gname, ".")) ip = gname; } if (!ip && *cid < 0) read_qemu_guests(); if (!ip) guest = trace_get_guest(*cid, gname); if (guest) { *cid = guest->cid; return guest->name; } /* Test to see if this is an internet address */ result = do_getaddrinfo(gname, *port, USE_TCP); if (!result) return NULL; *res = result; return gname; } static void set_prio(int prio) { struct sched_param sp; memset(&sp, 0, sizeof(sp)); sp.sched_priority = prio; if (sched_setscheduler(0, SCHED_FIFO, &sp) < 0) warning("failed to set priority"); } static struct tracecmd_recorder * create_recorder_instance_pipe(struct buffer_instance *instance, int cpu, int *brass) { struct tracecmd_recorder *recorder; unsigned flags = recorder_flags | TRACECMD_RECORD_BLOCK_SPLICE; /* This is already the child */ close(brass[0]); recorder = tracecmd_create_buffer_recorder_fd(brass[1], cpu, flags, instance->tracefs); return recorder; } static struct tracecmd_recorder * create_recorder_instance(struct buffer_instance *instance, const char *file, int cpu, int *brass) { struct tracecmd_recorder *record; struct addrinfo *result; if (is_guest(instance)) { int fd; unsigned int flags; if (instance->use_fifos) fd = instance->fds[cpu]; else if (is_network(instance)) { result = do_getaddrinfo(instance->name, instance->client_ports[cpu], instance->port_type); if (!result) die("Failed to connect to %s port %d\n", instance->name, instance->client_ports[cpu]); fd = connect_addr(result); freeaddrinfo(result); } else fd = trace_vsock_open(instance->cid, instance->client_ports[cpu]); if (fd < 0) die("Failed to connect to agent"); flags = recorder_flags; if (instance->use_fifos) flags |= TRACECMD_RECORD_NOBRASS; else if (!trace_vsock_can_splice_read()) flags |= TRACECMD_RECORD_NOSPLICE; return tracecmd_create_recorder_virt(file, cpu, flags, fd, max_kb); } if (brass) return create_recorder_instance_pipe(instance, cpu, brass); if (!tracefs_instance_get_name(instance->tracefs)) return tracecmd_create_recorder_maxkb(file, cpu, recorder_flags, max_kb); record = tracecmd_create_buffer_recorder_maxkb(file, cpu, recorder_flags, instance->tracefs, max_kb); return record; } /* * If extract is set, then this is going to set up the recorder, * connections and exit as the tracing is serialized by a single thread. */ static int create_recorder(struct buffer_instance *instance, int cpu, enum trace_type type, int *brass) { struct tracefs_instance *recorder_instance = NULL; long ret; char *file; pid_t pid; if (type != TRACE_TYPE_EXTRACT) { pid = fork(); if (pid < 0) die("fork"); if (pid) return pid; signal(SIGINT, SIG_IGN); signal(SIGTERM, SIG_IGN); signal(SIGUSR1, do_sig); signal(SIGUSR2, do_sig); signal(SIGALRM, do_sig); if (rt_prio) set_prio(rt_prio); /* do not kill tasks on error */ instance->cpu_count = 0; } if ((instance->client_ports && !is_guest(instance)) || is_agent(instance)) { unsigned int flags = recorder_flags; int fd; if (is_agent(instance)) { if (instance->use_fifos) fd = instance->fds[cpu]; else { again: fd = do_accept(instance->fds[cpu]); if (instance->host && !trace_net_cmp_connection_fd(fd, instance->host)) { dprint("Client does not match '%s' for cpu:%d\n", instance->host, cpu); close(fd); goto again; } } } else { fd = connect_port(host, instance->client_ports[cpu], instance->port_type); } if (fd < 0) die("Failed connecting to client"); if (tracefs_instance_get_name(instance->tracefs) && !is_agent(instance)) recorder_instance = instance->tracefs; recorder = tracecmd_create_buffer_recorder_fd(fd, cpu, flags, recorder_instance); } else { file = get_temp_file(instance, cpu); recorder = create_recorder_instance(instance, file, cpu, brass); put_temp_file(file); } if (!recorder) die ("can't create recorder"); if (type == TRACE_TYPE_EXTRACT) { ret = tracecmd_flush_recording(recorder, true); tracecmd_free_recorder(recorder); recorder = NULL; return ret; } while (!finished) { if (tracecmd_start_recording(recorder, sleep_time) < 0) break; } tracecmd_free_recorder(recorder); recorder = NULL; exit(0); } static void check_first_msg_from_server(struct tracecmd_msg_handle *msg_handle) { char buf[BUFSIZ]; read(msg_handle->fd, buf, 8); /* Make sure the server is the tracecmd server */ if (memcmp(buf, "tracecmd", 8) != 0) die("server not tracecmd server"); } static void communicate_with_listener_v1(struct tracecmd_msg_handle *msg_handle, struct buffer_instance *instance) { unsigned int *client_ports; char buf[BUFSIZ]; ssize_t n; int cpu, i; check_first_msg_from_server(msg_handle); /* write the number of CPUs we have (in ASCII) */ sprintf(buf, "%d", local_cpu_count); /* include \0 */ write(msg_handle->fd, buf, strlen(buf)+1); /* write the pagesize (in ASCII) */ sprintf(buf, "%d", page_size); /* include \0 */ write(msg_handle->fd, buf, strlen(buf)+1); /* * If we are using IPV4 and our page size is greater than * or equal to 64K, we need to punt and use TCP. :-( */ /* TODO, test for ipv4 */ if (page_size >= UDP_MAX_PACKET) { warning("page size too big for UDP using TCP in live read"); instance->port_type = USE_TCP; msg_handle->flags |= TRACECMD_MSG_FL_USE_TCP; } if (instance->port_type == USE_TCP) { /* Send one option */ write(msg_handle->fd, "1", 2); /* Size 4 */ write(msg_handle->fd, "4", 2); /* use TCP */ write(msg_handle->fd, "TCP", 4); } else /* No options */ write(msg_handle->fd, "0", 2); client_ports = malloc(local_cpu_count * sizeof(*client_ports)); if (!client_ports) die("Failed to allocate client ports for %d cpus", local_cpu_count); /* * Now we will receive back a comma deliminated list * of client ports to connect to. */ for (cpu = 0; cpu < local_cpu_count; cpu++) { for (i = 0; i < BUFSIZ; i++) { n = read(msg_handle->fd, buf+i, 1); if (n != 1) die("Error, reading server ports"); if (!buf[i] || buf[i] == ',') break; } if (i == BUFSIZ) die("read bad port number"); buf[i] = 0; client_ports[cpu] = atoi(buf); } instance->client_ports = client_ports; } static void communicate_with_listener_v3(struct tracecmd_msg_handle *msg_handle, unsigned int **client_ports) { if (tracecmd_msg_send_init_data(msg_handle, client_ports) < 0) die("Cannot communicate with server"); } static void check_protocol_version(struct tracecmd_msg_handle *msg_handle) { char buf[BUFSIZ]; int fd = msg_handle->fd; int n; check_first_msg_from_server(msg_handle); /* * Write the protocol version, the magic number, and the dummy * option(0) (in ASCII). The client understands whether the client * uses the v3 protocol or not by checking a reply message from the * server. If the message is "V3", the server uses v3 protocol. On the * other hands, if the message is just number strings, the server * returned port numbers. So, in that time, the client understands the * server uses the v1 protocol. However, the old server tells the * client port numbers after reading cpu_count, page_size, and option. * So, we add the dummy number (the magic number and 0 option) to the * first client message. */ write(fd, V3_CPU, sizeof(V3_CPU)); buf[0] = 0; /* read a reply message */ n = read(fd, buf, BUFSIZ); if (n < 0 || !buf[0]) { /* the server uses the v1 protocol, so we'll use it */ msg_handle->version = V1_PROTOCOL; tracecmd_plog("Use the v1 protocol\n"); } else { if (memcmp(buf, "V3", n) != 0) die("Cannot handle the protocol %s", buf); /* OK, let's use v3 protocol */ write(fd, V3_MAGIC, sizeof(V3_MAGIC)); n = read(fd, buf, BUFSIZ - 1); if (n != 2 || memcmp(buf, "OK", 2) != 0) { if (n < 0) n = 0; buf[n] = 0; die("Cannot handle the protocol %s", buf); } } } static int connect_vsock(char *vhost) { char *cid; char *port; char *p; int sd; host = strdup(vhost); if (!host) die("alloctating server"); cid = strtok_r(host, ":", &p); port = strtok_r(NULL, "", &p); if (!port) die("vsocket must have format of 'CID:PORT'"); sd = trace_vsock_open(atoi(cid), atoi(port)); return sd; } static int connect_ip(char *thost) { struct addrinfo *result; int sfd; char *server; char *port; char *p; if (!strchr(thost, ':')) { server = strdup("localhost"); if (!server) die("alloctating server"); port = thost; host = server; } else { host = strdup(thost); if (!host) die("alloctating server"); server = strtok_r(host, ":", &p); port = strtok_r(NULL, ":", &p); } result = do_getaddrinfo(server, atoi(port), USE_TCP); if (!result) die("getaddrinfo: %s", gai_err); sfd = connect_addr(result); freeaddrinfo(result); if (sfd < 0) die("Can not connect to %s:%s", server, port); return sfd; } static struct tracecmd_msg_handle *setup_network(struct buffer_instance *instance) { struct tracecmd_msg_handle *msg_handle = NULL; enum port_type type = instance->port_type; char *thost = strdup(host); int sfd; if (!thost) die("Failed to allocate host"); again: switch (type) { case USE_VSOCK: sfd = connect_vsock(thost); break; default: sfd = connect_ip(thost); } if (sfd < 0) { free(thost); return NULL; } if (msg_handle) { msg_handle->fd = sfd; } else { msg_handle = tracecmd_msg_handle_alloc(sfd, 0); if (!msg_handle) die("Failed to allocate message handle"); msg_handle->cpu_count = local_cpu_count; msg_handle->version = V3_PROTOCOL; } switch (type) { case USE_TCP: msg_handle->flags |= TRACECMD_MSG_FL_USE_TCP; break; case USE_VSOCK: msg_handle->flags |= TRACECMD_MSG_FL_USE_VSOCK; break; default: break; } if (msg_handle->version == V3_PROTOCOL) { check_protocol_version(msg_handle); if (msg_handle->version == V1_PROTOCOL) { /* reconnect to the server for using the v1 protocol */ close(sfd); free(host); host = NULL; goto again; } communicate_with_listener_v3(msg_handle, &instance->client_ports); } if (msg_handle->version == V1_PROTOCOL) communicate_with_listener_v1(msg_handle, instance); free(thost); return msg_handle; } static void add_options(struct tracecmd_output *handle, struct common_record_context *ctx); static struct tracecmd_output *create_net_output(struct common_record_context *ctx, struct tracecmd_msg_handle *msg_handle) { struct tracecmd_output *out; out = tracecmd_output_create(NULL); if (!out) return NULL; if (ctx->file_version && tracecmd_output_set_version(out, ctx->file_version)) goto error; if (tracecmd_output_set_msg(out, msg_handle)) goto error; if (ctx->compression) { if (tracecmd_output_set_compression(out, ctx->compression)) goto error; } else if (ctx->file_version >= FILE_VERSION_COMPRESSION) { tracecmd_output_set_compression(out, "any"); } if (tracecmd_output_write_headers(out, listed_events)) goto error; return out; error: tracecmd_output_close(out); return NULL; } static struct tracecmd_msg_handle * setup_connection(struct buffer_instance *instance, struct common_record_context *ctx) { struct tracecmd_msg_handle *msg_handle = NULL; struct tracecmd_output *network_handle = NULL; int ret; msg_handle = setup_network(instance); if (!msg_handle) die("Failed to make connection"); /* Now create the handle through this socket */ if (msg_handle->version == V3_PROTOCOL) { network_handle = create_net_output(ctx, msg_handle); if (!network_handle) goto error; tracecmd_set_quiet(network_handle, quiet); add_options(network_handle, ctx); ret = tracecmd_write_cmdlines(network_handle); if (ret) goto error; ret = tracecmd_write_cpus(network_handle, instance->cpu_count); if (ret) goto error; ret = tracecmd_write_buffer_info(network_handle); if (ret) goto error; ret = tracecmd_write_options(network_handle); if (ret) goto error; ret = tracecmd_msg_finish_sending_data(msg_handle); if (ret) goto error; } else { /* * V3 can handle compression, but V1 can not. * Set the file version back to 6. */ ctx->file_version = FILE_VERSION_MIN; ctx->compression = false; network_handle = tracecmd_output_create_fd(msg_handle->fd); if (!network_handle) goto error; if (tracecmd_output_set_version(network_handle, ctx->file_version)) goto error; if (tracecmd_output_write_headers(network_handle, listed_events)) goto error; tracecmd_set_quiet(network_handle, quiet); } instance->network_handle = network_handle; /* OK, we are all set, let'r rip! */ return msg_handle; error: if (msg_handle) tracecmd_msg_handle_close(msg_handle); if (network_handle) tracecmd_output_close(network_handle); return NULL; } static void finish_network(struct tracecmd_msg_handle *msg_handle) { if (msg_handle->version == V3_PROTOCOL) tracecmd_msg_send_close_msg(msg_handle); tracecmd_msg_handle_close(msg_handle); free(host); } static int open_guest_fifos(const char *guest, int **fds) { char path[PATH_MAX]; int i, fd, flags; for (i = 0; ; i++) { snprintf(path, sizeof(path), GUEST_FIFO_FMT ".out", guest, i); /* O_NONBLOCK so we don't wait for writers */ fd = open(path, O_RDONLY | O_NONBLOCK); if (fd < 0) break; /* Success, now clear O_NONBLOCK */ flags = fcntl(fd, F_GETFL); fcntl(fd, F_SETFL, flags & ~O_NONBLOCK); *fds = realloc(*fds, i + 1); (*fds)[i] = fd; } return i; } static bool clock_is_supported(struct tracefs_instance *instance, const char *clock); static int host_tsync(struct common_record_context *ctx, struct buffer_instance *instance, unsigned int tsync_port, char *proto) { struct buffer_instance *iter_instance; int guest_id = -1; int fd; if (!proto) return -1; /* If connecting to a proxy, the clock may still need to be set */ if (strcmp(proto, "kvm") == 0 && clock_is_supported(NULL, TSC_CLOCK)) { ctx->clock = TSC_CLOCK; for_all_instances(iter_instance) { iter_instance->clock = TSC_CLOCK; set_clock(ctx, iter_instance); } } if (is_network(instance)) { fd = connect_port(instance->name, tsync_port, instance->port_type); } else { guest_id = instance->cid; fd = trace_vsock_open(instance->cid, tsync_port); } if (is_proxy(instance)) { instance->tsync = trace_tsync_as_guest(fd, proto, ctx->clock, guest_id, -1); } else { instance->tsync = trace_tsync_as_host(fd, top_instance.trace_id, instance->tsync_loop_interval, guest_id, instance->cpu_count, proto, ctx->clock); } return instance->tsync ? 0 : -1; } static void connect_to_agent(struct common_record_context *ctx, struct buffer_instance *instance) { struct tracecmd_tsync_protos *protos = NULL; int sd, ret, nr_fifos, nr_cpus, page_size; struct tracecmd_msg_handle *msg_handle; enum tracecmd_time_sync_role role; char *tsync_protos_reply = NULL; unsigned int tsync_port = 0; unsigned int *ports; int i, *fds = NULL; bool use_fifos = false; int siblings = 0; if (!no_fifos) { nr_fifos = open_guest_fifos(instance->name, &fds); use_fifos = nr_fifos > 0; } if (instance->result) { role = TRACECMD_TIME_SYNC_ROLE_CLIENT; sd = connect_addr(instance->result); if (sd < 0) die("Failed to connect to host %s:%u", instance->name, instance->port); } else { /* If connecting to a proxy, then this is the guest */ if (is_proxy(instance)) role = TRACECMD_TIME_SYNC_ROLE_GUEST; else role = TRACECMD_TIME_SYNC_ROLE_HOST; sd = trace_vsock_open(instance->cid, instance->port); if (sd < 0) die("Failed to connect to vsocket @%u:%u", instance->cid, instance->port); } msg_handle = tracecmd_msg_handle_alloc(sd, 0); if (!msg_handle) die("Failed to allocate message handle"); if (!instance->clock) instance->clock = tracefs_get_clock(NULL); if (instance->tsync_loop_interval >= 0) tracecmd_tsync_proto_getall(&protos, instance->clock, role); if (is_proxy(instance)) ret = tracecmd_msg_send_trace_proxy(msg_handle, instance->argc, instance->argv, use_fifos, top_instance.trace_id, protos, tracecmd_count_cpus(), siblings); else ret = tracecmd_msg_send_trace_req(msg_handle, instance->argc, instance->argv, use_fifos, top_instance.trace_id, protos); if (ret < 0) die("Failed to send trace %s", is_proxy(instance) ? "proxy" : "request"); if (protos) { free(protos->names); free(protos); } ret = tracecmd_msg_recv_trace_resp(msg_handle, &nr_cpus, &page_size, &ports, &use_fifos, &instance->trace_id, &tsync_protos_reply, &tsync_port); if (ret < 0) die("Failed to receive trace response %d", ret); if (tsync_protos_reply && tsync_protos_reply[0]) { if (tsync_proto_is_supported(tsync_protos_reply)) { printf("Negotiated %s time sync protocol with guest %s\n", tsync_protos_reply, instance->name); instance->cpu_count = nr_cpus; host_tsync(ctx, instance, tsync_port, tsync_protos_reply); } else warning("Failed to negotiate timestamps synchronization with the guest"); } free(tsync_protos_reply); if (use_fifos) { if (nr_cpus != nr_fifos) { warning("number of FIFOs (%d) for guest %s differs " "from number of virtual CPUs (%d)", nr_fifos, instance->name, nr_cpus); nr_cpus = nr_cpus < nr_fifos ? nr_cpus : nr_fifos; } free(ports); instance->fds = fds; } else { for (i = 0; i < nr_fifos; i++) close(fds[i]); free(fds); instance->client_ports = ports; } instance->use_fifos = use_fifos; instance->cpu_count = nr_cpus; /* the msg_handle now points to the guest fd */ instance->msg_handle = msg_handle; } static void setup_guest(struct buffer_instance *instance) { struct tracecmd_msg_handle *msg_handle = instance->msg_handle; const char *output_file = instance->output_file; char *file; int fd; /* Create a place to store the guest meta data */ file = trace_get_guest_file(output_file, instance->name); if (!file) die("Failed to allocate memory"); free(instance->output_file); instance->output_file = file; fd = open(file, O_CREAT|O_WRONLY|O_TRUNC, 0644); if (fd < 0) die("Failed to open %s", file); /* Start reading tracing metadata */ if (tracecmd_msg_read_data(msg_handle, fd)) die("Failed receiving metadata"); /* * If connected to a proxy, then it still needs to send * the host / guest timings from its POV. */ if (is_proxy(instance)) instance->proxy_fd = fd; else close(fd); } static void setup_agent(struct buffer_instance *instance, struct common_record_context *ctx) { struct tracecmd_output *network_handle; network_handle = create_net_output(ctx, instance->msg_handle); add_options(network_handle, ctx); tracecmd_write_cmdlines(network_handle); tracecmd_write_cpus(network_handle, instance->cpu_count); tracecmd_write_buffer_info(network_handle); if (instance->msg_handle->flags & TRACECMD_MSG_FL_PROXY) { tracecmd_prepare_options(network_handle, 0, SEEK_CUR); tracecmd_msg_flush_data(instance->msg_handle); } else { tracecmd_write_options(network_handle); tracecmd_write_meta_strings(network_handle); tracecmd_msg_finish_sending_data(instance->msg_handle); } instance->network_handle = network_handle; } static void start_threads(enum trace_type type, struct common_record_context *ctx) { struct buffer_instance *instance; int total_cpu_count = 0; int i = 0; int ret; for_all_instances(instance) { /* Start the connection now to find out how many CPUs we need */ if (is_guest(instance)) connect_to_agent(ctx, instance); total_cpu_count += instance->cpu_count; } /* make a thread for every CPU we have */ pids = calloc(total_cpu_count * (buffers + 1), sizeof(*pids)); if (!pids) die("Failed to allocate pids for %d cpus", total_cpu_count); for_all_instances(instance) { int *brass = NULL; int x, pid; /* May be set by setup_guest() but all others is -1 */ instance->proxy_fd = -1; if (is_agent(instance)) { setup_agent(instance, ctx); } else if (is_guest(instance)) { setup_guest(instance); } else if (host) { instance->msg_handle = setup_connection(instance, ctx); if (!instance->msg_handle) die("Failed to make connection"); } for (x = 0; x < instance->cpu_count; x++) { if (type & TRACE_TYPE_STREAM) { brass = pids[i].brass; ret = pipe(brass); if (ret < 0) die("pipe"); pids[i].stream = trace_stream_init(instance, x, brass[0], instance->cpu_count, hooks, handle_init, ctx->global); if (!pids[i].stream) die("Creating stream for %d", i); } else pids[i].brass[0] = -1; pids[i].cpu = x; pids[i].instance = instance; /* Make sure all output is flushed before forking */ fflush(stdout); pid = pids[i++].pid = create_recorder(instance, x, type, brass); if (brass) close(brass[1]); if (pid > 0) add_filter_pid(instance, pid, 1); } } recorder_threads = i; } static void touch_file(const char *file) { int fd; fd = open(file, O_WRONLY | O_CREAT | O_TRUNC, 0644); if (fd < 0) die("could not create file %s\n", file); close(fd); } static void append_buffer(struct tracecmd_output *handle, struct buffer_instance *instance, char **temp_files) { int cpu_count = instance->cpu_count; int i; /* * Since we can record remote and virtual machines in the same file * as the host, the buffers may no longer have matching number of * CPU data as the host. For backward compatibility for older * trace-cmd versions, which will blindly read the number of CPUs * for each buffer instance as there are for the host, if there are * fewer CPUs on the remote machine than on the host, an "empty" * CPU is needed for each CPU that the host has that the remote does * not. If there are more CPUs on the remote, older executables will * simply ignore them (which is OK, we only need to guarantee that * old executables don't crash). */ if (instance->cpu_count < local_cpu_count) cpu_count = local_cpu_count; for (i = 0; i < cpu_count; i++) { temp_files[i] = get_temp_file(instance, i); if (i >= instance->cpu_count) touch_file(temp_files[i]); } tracecmd_append_buffer_cpu_data(handle, tracefs_instance_get_name(instance->tracefs), cpu_count, temp_files); for (i = 0; i < instance->cpu_count; i++) { if (i >= instance->cpu_count) delete_temp_file(instance, i); put_temp_file(temp_files[i]); } } static void add_pid_maps(struct tracecmd_output *handle, struct buffer_instance *instance) { struct pid_addr_maps *maps = instance->pid_maps; struct trace_seq s; int i; trace_seq_init(&s); while (maps) { if (!maps->nr_lib_maps) { maps = maps->next; continue; } trace_seq_reset(&s); trace_seq_printf(&s, "%x %x %s\n", maps->pid, maps->nr_lib_maps, maps->proc_name); for (i = 0; i < maps->nr_lib_maps; i++) trace_seq_printf(&s, "%zx %zx %s\n", maps->lib_maps[i].start, maps->lib_maps[i].end, maps->lib_maps[i].lib_name); trace_seq_terminate(&s); tracecmd_add_option(handle, TRACECMD_OPTION_PROCMAPS, s.len + 1, s.buffer); maps = maps->next; } trace_seq_destroy(&s); } static void add_trace_id(struct tracecmd_output *handle, struct buffer_instance *instance) { tracecmd_add_option(handle, TRACECMD_OPTION_TRACEID, sizeof(long long), &instance->trace_id); } static void add_buffer_stat(struct tracecmd_output *handle, struct buffer_instance *instance) { struct trace_seq s; int i; trace_seq_init(&s); trace_seq_printf(&s, "\nBuffer: %s\n\n", tracefs_instance_get_name(instance->tracefs)); tracecmd_add_option(handle, TRACECMD_OPTION_CPUSTAT, s.len+1, s.buffer); trace_seq_destroy(&s); for (i = 0; i < instance->cpu_count; i++) tracecmd_add_option(handle, TRACECMD_OPTION_CPUSTAT, instance->s_save[i].len+1, instance->s_save[i].buffer); } static void add_option_hooks(struct tracecmd_output *handle) { struct hook_list *hook; int len; for (hook = hooks; hook; hook = hook->next) { len = strlen(hook->hook); tracecmd_add_option(handle, TRACECMD_OPTION_HOOK, len + 1, hook->hook); } } static void add_uname(struct tracecmd_output *handle) { struct utsname buf; char *str; int len; int ret; ret = uname(&buf); /* if this fails for some reason, just ignore it */ if (ret < 0) return; len = strlen(buf.sysname) + strlen(buf.nodename) + strlen(buf.release) + strlen(buf.machine) + 4; str = malloc(len); if (!str) return; sprintf(str, "%s %s %s %s", buf.sysname, buf.nodename, buf.release, buf.machine); tracecmd_add_option(handle, TRACECMD_OPTION_UNAME, len, str); free(str); } static void add_version(struct tracecmd_output *handle) { char *str; int len; len = asprintf(&str, "%s %s", VERSION_STRING, VERSION_GIT); if (len < 0) return; tracecmd_add_option(handle, TRACECMD_OPTION_VERSION, len+1, str); free(str); } static void print_stat(struct buffer_instance *instance) { int cpu; if (quiet) return; if (!is_top_instance(instance)) printf("\nBuffer: %s\n\n", tracefs_instance_get_name(instance->tracefs)); for (cpu = 0; cpu < instance->cpu_count; cpu++) trace_seq_do_printf(&instance->s_print[cpu]); } static char *get_trace_clock(bool selected) { struct buffer_instance *instance; for_all_instances(instance) { if (is_guest(instance)) continue; break; } if (selected) return tracefs_get_clock(instance ? instance->tracefs : NULL); else return tracefs_instance_file_read(instance ? instance->tracefs : NULL, "trace_clock", NULL); } enum { DATA_FL_NONE = 0, DATA_FL_DATE = 1, DATA_FL_OFFSET = 2, DATA_FL_GUEST = 4, DATA_FL_PROXY = 8, }; static void add_options(struct tracecmd_output *handle, struct common_record_context *ctx) { int type = 0; char *clocks; if (ctx->date2ts) { if (ctx->data_flags & DATA_FL_DATE) type = TRACECMD_OPTION_DATE; else if (ctx->data_flags & DATA_FL_OFFSET) type = TRACECMD_OPTION_OFFSET; } if (type) tracecmd_add_option(handle, type, strlen(ctx->date2ts)+1, ctx->date2ts); clocks = get_trace_clock(false); tracecmd_add_option(handle, TRACECMD_OPTION_TRACECLOCK, clocks ? strlen(clocks)+1 : 0, clocks); add_option_hooks(handle); add_uname(handle); add_version(handle); if (!no_top_instance()) add_trace_id(handle, &top_instance); free(clocks); } static void write_guest_file(struct buffer_instance *instance) { struct tracecmd_output *handle; int cpu_count = instance->cpu_count; char *file; char **temp_files; int i, fd; file = instance->output_file; fd = open(file, O_RDWR); if (fd < 0) die("error opening %s", file); handle = tracecmd_get_output_handle_fd(fd); if (!handle) die("error writing to %s", file); if (instance->flags & BUFFER_FL_TSC2NSEC) tracecmd_set_out_clock(handle, TSCNSEC_CLOCK); temp_files = malloc(sizeof(*temp_files) * cpu_count); if (!temp_files) die("failed to allocate temp_files for %d cpus", cpu_count); for (i = 0; i < cpu_count; i++) { temp_files[i] = get_temp_file(instance, i); if (!temp_files[i]) die("failed to allocate memory"); } if (tracecmd_write_cpu_data(handle, cpu_count, temp_files, NULL) < 0) die("failed to write CPU data"); tracecmd_output_close(handle); for (i = 0; i < cpu_count; i++) put_temp_file(temp_files[i]); free(temp_files); } static struct tracecmd_output *create_output(struct common_record_context *ctx) { struct tracecmd_output *out; if (!ctx->output) return NULL; out = tracecmd_output_create(ctx->output); if (!out) goto error; if (ctx->file_version && tracecmd_output_set_version(out, ctx->file_version)) goto error; if (ctx->compression) { if (tracecmd_output_set_compression(out, ctx->compression)) goto error; } else if (ctx->file_version >= FILE_VERSION_COMPRESSION) { tracecmd_output_set_compression(out, "any"); } if (tracecmd_output_write_headers(out, listed_events)) goto error; return out; error: if (out) tracecmd_output_close(out); unlink(ctx->output); return NULL; } static void record_data(struct common_record_context *ctx) { struct tracecmd_output *handle; struct buffer_instance *instance; bool have_proxy = false; bool local = false; int max_cpu_count = local_cpu_count; char **temp_files; int i; for_all_instances(instance) { if (is_guest(instance)) { write_guest_file(instance); if (is_proxy(instance)) have_proxy = true; } else if (host && instance->msg_handle) finish_network(instance->msg_handle); else local = true; } if (!local) return; if (latency) { handle = tracecmd_create_file_latency(ctx->output, local_cpu_count, ctx->file_version, ctx->compression); tracecmd_set_quiet(handle, quiet); } else { if (!local_cpu_count) return; /* Allocate enough temp files to handle each instance */ for_all_instances(instance) { if (instance->msg_handle) continue; if (instance->cpu_count > max_cpu_count) max_cpu_count = instance->cpu_count; } temp_files = malloc(sizeof(*temp_files) * max_cpu_count); if (!temp_files) die("Failed to allocate temp_files for %d cpus", local_cpu_count); for (i = 0; i < max_cpu_count; i++) temp_files[i] = get_temp_file(&top_instance, i); /* * If top_instance was not used, we still need to create * empty trace.dat files for it. */ if (no_top_instance() || top_instance.msg_handle) { for (i = 0; i < local_cpu_count; i++) touch_file(temp_files[i]); } handle = create_output(ctx); if (!handle) die("Error creating output file"); tracecmd_set_quiet(handle, quiet); add_options(handle, ctx); /* * If we connected to a proxy, then it will now send us * the tsync data for our file. */ if (have_proxy) { for_all_instances(instance) { if (!is_proxy(instance)) continue; /* Tell proxy we are ready for the rest */ tracecmd_msg_cont(instance->msg_handle); tracecmd_msg_read_options(instance->msg_handle, handle); tracecmd_msg_wait_close_resp(instance->msg_handle); tracecmd_msg_handle_close(instance->msg_handle); } } /* Only record the top instance under TRACECMD_OPTION_CPUSTAT*/ if (!no_top_instance() && !top_instance.msg_handle) { struct trace_seq *s = top_instance.s_save; for (i = 0; i < local_cpu_count; i++) tracecmd_add_option(handle, TRACECMD_OPTION_CPUSTAT, s[i].len+1, s[i].buffer); } if (buffers) { i = 0; for_each_instance(instance) { int cpus = instance->cpu_count != local_cpu_count ? instance->cpu_count : 0; if (instance->msg_handle) continue; tracecmd_add_buffer_info(handle, tracefs_instance_get_name(instance->tracefs), cpus); add_buffer_stat(handle, instance); } } if (!no_top_instance() && !top_instance.msg_handle) print_stat(&top_instance); for_all_instances(instance) { add_pid_maps(handle, instance); } for_all_instances(instance) { if (is_guest(instance)) trace_add_guest_info(handle, instance); } if (ctx->tsc2nsec.mult) { add_tsc2nsec(handle, &ctx->tsc2nsec); tracecmd_set_out_clock(handle, TSCNSEC_CLOCK); } if (tracecmd_write_cmdlines(handle)) die("Writing cmdlines"); tracecmd_append_cpu_data(handle, local_cpu_count, temp_files); for (i = 0; i < max_cpu_count; i++) put_temp_file(temp_files[i]); if (buffers) { i = 0; for_each_instance(instance) { if (instance->msg_handle) continue; print_stat(instance); append_buffer(handle, instance, temp_files); } } free(temp_files); } if (!handle) die("could not write to file"); tracecmd_output_close(handle); } enum filter_type { FUNC_FILTER, FUNC_NOTRACE, }; static int filter_command(struct tracefs_instance *instance, const char *cmd) { return tracefs_instance_file_append(instance, "set_ftrace_filter", cmd); } static int write_func_filter(enum filter_type type, struct buffer_instance *instance, struct func_list **list) { struct func_list *item, *cmds = NULL; const char *file; int ret = -1; int (*filter_function)(struct tracefs_instance *instance, const char *filter, const char *module, unsigned int flags); if (!*list) return 0; switch (type) { case FUNC_FILTER: filter_function = tracefs_function_filter; file = "set_ftrace_filter"; break; case FUNC_NOTRACE: filter_function = tracefs_function_notrace; file = "set_ftrace_notrace"; break; } ret = filter_function(instance->tracefs, NULL, NULL, TRACEFS_FL_RESET | TRACEFS_FL_CONTINUE); if (ret < 0) return ret; while (*list) { item = *list; *list = item->next; /* Do commands separately at the end */ if (type == FUNC_FILTER && strstr(item->func, ":")) { item->next = cmds; cmds = item; continue; } ret = filter_function(instance->tracefs, item->func, item->mod, TRACEFS_FL_CONTINUE); if (ret < 0) goto failed; free(item); } ret = filter_function(instance->tracefs, NULL, NULL, 0); /* Now add any commands */ while (cmds) { item = cmds; cmds = item->next; ret = filter_command(instance->tracefs, item->func); if (ret < 0) goto failed; free(item); } return ret; failed: die("Failed to write %s to %s.\n" "Perhaps this function is not available for tracing.\n" "run 'trace-cmd list -f %s' to see if it is.", item->func, file, item->func); return ret; } static int write_func_file(struct buffer_instance *instance, const char *file, struct func_list **list) { struct func_list *item; const char *prefix = ":mod:"; char *path; int fd; int ret = -1; if (!*list) return 0; path = tracefs_instance_get_file(instance->tracefs, file); fd = open(path, O_WRONLY | O_TRUNC); if (fd < 0) goto free; while (*list) { item = *list; *list = item->next; ret = write(fd, item->func, strlen(item->func)); if (ret < 0) goto failed; if (item->mod) { ret = write(fd, prefix, strlen(prefix)); if (ret < 0) goto failed; ret = write(fd, item->mod, strlen(item->mod)); if (ret < 0) goto failed; } ret = write(fd, " ", 1); if (ret < 0) goto failed; free(item); } close(fd); ret = 0; free: tracefs_put_tracing_file(path); return ret; failed: die("Failed to write %s to %s.\n" "Perhaps this function is not available for tracing.\n" "run 'trace-cmd list -f %s' to see if it is.", item->func, file, item->func); return ret; } static int functions_filtered(struct buffer_instance *instance) { char buf[1] = { '#' }; char *path; int fd; path = tracefs_instance_get_file(instance->tracefs, "set_ftrace_filter"); fd = open(path, O_RDONLY); tracefs_put_tracing_file(path); if (fd < 0) { if (is_top_instance(instance)) warning("Can not set set_ftrace_filter"); else warning("Can not set set_ftrace_filter for %s", tracefs_instance_get_name(instance->tracefs)); return 0; } /* * If functions are not filtered, than the first character * will be '#'. Make sure it is not an '#' and also not space. */ read(fd, buf, 1); close(fd); if (buf[0] == '#' || isspace(buf[0])) return 0; return 1; } static void set_funcs(struct buffer_instance *instance) { int set_notrace = 0; int ret; if (is_guest(instance)) return; ret = write_func_filter(FUNC_FILTER, instance, &instance->filter_funcs); if (ret < 0) die("set_ftrace_filter does not exist. Can not filter functions"); /* graph tracing currently only works for top instance */ if (is_top_instance(instance)) { ret = write_func_file(instance, "set_graph_function", &graph_funcs); if (ret < 0) die("set_graph_function does not exist."); if (instance->plugin && strcmp(instance->plugin, "function_graph") == 0) { ret = write_func_file(instance, "set_graph_notrace", &instance->notrace_funcs); if (!ret) set_notrace = 1; } if (!set_notrace) { ret = write_func_filter(FUNC_NOTRACE, instance, &instance->notrace_funcs); if (ret < 0) die("set_ftrace_notrace does not exist. Can not filter functions"); } } else write_func_filter(FUNC_NOTRACE, instance, &instance->notrace_funcs); /* make sure we are filtering functions */ if (func_stack && is_top_instance(instance)) { if (!functions_filtered(instance)) die("Function stack trace set, but functions not filtered"); save_option(instance, FUNC_STACK_TRACE); } clear_function_filters = 1; } static void add_func(struct func_list **list, const char *mod, const char *func) { struct func_list *item; item = malloc(sizeof(*item)); if (!item) die("Failed to allocate function descriptor"); item->func = func; item->mod = mod; item->next = *list; *list = item; } static int find_ts(struct tep_event *event, struct tep_record *record, int cpu, void *context) { unsigned long long *ts = (unsigned long long *)context; struct tep_format_field *field; if (!ts) return -1; field = tep_find_field(event, "buf"); if (field && strcmp(STAMP"\n", record->data + field->offset) == 0) { *ts = record->ts; return 1; } return 0; } static unsigned long long find_time_stamp(struct tep_handle *tep, struct tracefs_instance *instance) { unsigned long long ts = 0; if (!tracefs_iterate_raw_events(tep, instance, NULL, 0, find_ts, &ts)) return ts; return 0; } static char *read_top_file(char *file, int *psize) { return tracefs_instance_file_read(top_instance.tracefs, file, psize); } static struct tep_handle *get_ftrace_tep(void) { const char *systems[] = {"ftrace", NULL}; struct tep_handle *tep; char *buf; int size; int ret; tep = tracefs_local_events_system(NULL, systems); if (!tep) return NULL; tep_set_file_bigendian(tep, tracecmd_host_bigendian()); buf = read_top_file("events/header_page", &size); if (!buf) goto error; ret = tep_parse_header_page(tep, buf, size, sizeof(unsigned long)); free(buf); if (ret < 0) goto error; return tep; error: tep_free(tep); return NULL; } /* * Try to write the date into the ftrace buffer and then * read it back, mapping the timestamp to the date. */ static char *get_date_to_ts(void) { struct tep_handle *tep; unsigned long long min = -1ULL; unsigned long long diff; unsigned long long stamp; unsigned long long min_stamp; unsigned long long min_ts; unsigned long long ts; struct timespec start; struct timespec end; char *date2ts = NULL; int tfd; int i; /* Set up a tep to read the raw format */ tep = get_ftrace_tep(); if (!tep) { warning("failed to alloc tep, --date ignored"); return NULL; } tfd = tracefs_instance_file_open(NULL, "trace_marker", O_WRONLY); if (tfd < 0) { warning("Can not open 'trace_marker', --date ignored"); goto out_pevent; } for (i = 0; i < date2ts_tries; i++) { tracecmd_disable_tracing(); clear_trace_instances(); tracecmd_enable_tracing(); clock_gettime(CLOCK_REALTIME, &start); write(tfd, STAMP, 5); clock_gettime(CLOCK_REALTIME, &end); tracecmd_disable_tracing(); ts = find_time_stamp(tep, NULL); if (!ts) continue; diff = (unsigned long long)end.tv_sec * 1000000000LL; diff += (unsigned long long)end.tv_nsec; stamp = diff; diff -= (unsigned long long)start.tv_sec * 1000000000LL; diff -= (unsigned long long)start.tv_nsec; if (diff < min) { min_ts = ts; min_stamp = stamp - diff / 2; min = diff; } } close(tfd); if (min == -1ULL) { warning("Failed to make date offset, --date ignored"); goto out_pevent; } /* 16 hex chars + 0x + \0 */ date2ts = malloc(19); if (!date2ts) goto out_pevent; /* * The difference between the timestamp and the gtod is * stored as an ASCII string in hex. */ diff = min_stamp - min_ts; snprintf(date2ts, 19, "0x%llx", diff/1000); out_pevent: tep_free(tep); return date2ts; } static void set_buffer_size_instance(struct buffer_instance *instance) { int buffer_size = instance->buffer_size; int ret; if (is_guest(instance)) return; if (!buffer_size) return; if (buffer_size < 0) die("buffer size must be positive"); instance->old_buffer_size = tracefs_instance_get_buffer_size(instance->tracefs, 0); ret = tracefs_instance_set_buffer_size(instance->tracefs, buffer_size, -1); if (ret < 0) warning("Can't set buffer size"); } static void set_subbuf_size_instance(struct buffer_instance *instance) { int subbuf_size = instance->subbuf_size; int ret; if (is_guest(instance)) return; if (!subbuf_size) return; if (subbuf_size < 0) die("sub-buffer size must be positive"); instance->old_subbuf_size = tracefs_instance_get_subbuf_size(instance->tracefs); ret = tracefs_instance_set_subbuf_size(instance->tracefs, subbuf_size); if (ret < 0) warning("Can't set sub-buffer size"); } void set_buffer_size(void) { struct buffer_instance *instance; for_all_instances(instance) { set_buffer_size_instance(instance); set_subbuf_size_instance(instance); } } static int process_event_trigger(char *path, struct event_iter *iter) { const char *system = iter->system_dent->d_name; const char *event = iter->event_dent->d_name; struct stat st; char *trigger = NULL; char *file; int ret; path = append_file(path, system); file = append_file(path, event); free(path); ret = stat(file, &st); if (ret < 0 || !S_ISDIR(st.st_mode)) goto out; trigger = append_file(file, "trigger"); ret = stat(trigger, &st); if (ret < 0) goto out; ret = clear_trigger(trigger); out: free(trigger); free(file); return ret; } static void clear_instance_triggers(struct buffer_instance *instance) { enum event_iter_type type; struct event_iter *iter; char *system; char *path; int retry = 0; int ret; path = tracefs_instance_get_file(instance->tracefs, "events"); if (!path) die("malloc"); iter = trace_event_iter_alloc(path); system = NULL; while ((type = trace_event_iter_next(iter, path, system))) { if (type == EVENT_ITER_SYSTEM) { system = iter->system_dent->d_name; continue; } ret = process_event_trigger(path, iter); if (ret > 0) retry++; } trace_event_iter_free(iter); if (retry) { int i; /* Order matters for some triggers */ for (i = 0; i < retry; i++) { int tries = 0; iter = trace_event_iter_alloc(path); system = NULL; while ((type = trace_event_iter_next(iter, path, system))) { if (type == EVENT_ITER_SYSTEM) { system = iter->system_dent->d_name; continue; } ret = process_event_trigger(path, iter); if (ret > 0) tries++; } trace_event_iter_free(iter); if (!tries) break; } } tracefs_put_tracing_file(path); } static void process_event_filter(char *path, struct event_iter *iter, enum event_process *processed) { const char *system = iter->system_dent->d_name; const char *event = iter->event_dent->d_name; struct stat st; char *filter = NULL; char *file; int ret; path = append_file(path, system); file = append_file(path, event); free(path); ret = stat(file, &st); if (ret < 0 || !S_ISDIR(st.st_mode)) goto out; filter = append_file(file, "filter"); ret = stat(filter, &st); if (ret < 0) goto out; clear_filter(filter); out: free(filter); free(file); } static void clear_instance_filters(struct buffer_instance *instance) { struct event_iter *iter; char *path; char *system; enum event_iter_type type; enum event_process processed = PROCESSED_NONE; path = tracefs_instance_get_file(instance->tracefs, "events"); if (!path) die("malloc"); iter = trace_event_iter_alloc(path); processed = PROCESSED_NONE; system = NULL; while ((type = trace_event_iter_next(iter, path, system))) { if (type == EVENT_ITER_SYSTEM) { system = iter->system_dent->d_name; continue; } process_event_filter(path, iter, &processed); } trace_event_iter_free(iter); tracefs_put_tracing_file(path); } static void clear_filters(void) { struct buffer_instance *instance; for_all_instances(instance) clear_instance_filters(instance); } static void reset_clock(void) { struct buffer_instance *instance; for_all_instances(instance) tracefs_instance_file_write(instance->tracefs, "trace_clock", "local"); } static void reset_cpu_mask(void) { struct buffer_instance *instance; int cpus = tracecmd_count_cpus(); int fullwords = (cpus - 1) / 32; int bits = (cpus - 1) % 32 + 1; int len = (fullwords + 1) * 9; char buf[len + 1]; buf[0] = '\0'; sprintf(buf, "%x", (unsigned int)((1ULL << bits) - 1)); while (fullwords-- > 0) strcat(buf, ",ffffffff"); for_all_instances(instance) tracefs_instance_file_write(instance->tracefs, "tracing_cpumask", buf); } static void reset_event_pid(void) { struct buffer_instance *instance; for_all_instances(instance) add_event_pid(instance, ""); } static void clear_triggers(void) { struct buffer_instance *instance; for_all_instances(instance) clear_instance_triggers(instance); } static void clear_instance_error_log(struct buffer_instance *instance) { char *file; if (!tracefs_file_exists(instance->tracefs, "error_log")) return; file = tracefs_instance_get_file(instance->tracefs, "error_log"); if (!file) return; write_file(file, " "); tracefs_put_tracing_file(file); } static void clear_error_log(void) { struct buffer_instance *instance; for_all_instances(instance) clear_instance_error_log(instance); } static void clear_all_dynamic_events(void) { /* Clear event probes first, as they may be attached to other dynamic event */ tracefs_dynevent_destroy_all(TRACEFS_DYNEVENT_EPROBE, true); tracefs_dynevent_destroy_all(TRACEFS_DYNEVENT_ALL, true); } static void clear_func_filters(void) { struct buffer_instance *instance; char *path; int i; const char * const files[] = { "set_ftrace_filter", "set_ftrace_notrace", "set_graph_function", "set_graph_notrace", NULL }; for_all_instances(instance) { for (i = 0; files[i]; i++) { path = tracefs_instance_get_file(instance->tracefs, files[i]); clear_func_filter(path); tracefs_put_tracing_file(path); } } } static void make_instances(void) { struct buffer_instance *instance; for_each_instance(instance) { if (is_guest(instance)) continue; if (instance->name && !instance->tracefs) { instance->tracefs = tracefs_instance_create(instance->name); /* Don't delete instances that already exist */ if (instance->tracefs && !tracefs_instance_is_new(instance->tracefs)) instance->flags |= BUFFER_FL_KEEP; } } } void tracecmd_remove_instances(void) { struct buffer_instance *instance; for_each_instance(instance) { /* Only delete what we created */ if (is_guest(instance) || (instance->flags & BUFFER_FL_KEEP)) continue; if (instance->tracing_on_fd > 0) { close(instance->tracing_on_fd); instance->tracing_on_fd = 0; } tracefs_instance_destroy(instance->tracefs); } } static void check_plugin(const char *plugin) { char *buf; char *str; char *tok; /* * nop is special. We may want to just trace * trace_printks, that are in the kernel. */ if (strcmp(plugin, "nop") == 0) return; buf = read_top_file("available_tracers", NULL); if (!buf) die("No plugins available"); str = buf; while ((tok = strtok(str, " "))) { str = NULL; if (strcmp(tok, plugin) == 0) goto out; } die ("Plugin '%s' does not exist", plugin); out: if (!quiet) fprintf(stderr, " plugin '%s'\n", plugin); free(buf); } static void check_function_plugin(void) { const char *plugin; /* We only care about the top_instance */ if (no_top_instance()) return; plugin = top_instance.plugin; if (!plugin) return; if (plugin && strncmp(plugin, "function", 8) == 0 && func_stack && !top_instance.filter_funcs) die("Must supply function filtering with --func-stack\n"); } static int __check_doing_something(struct buffer_instance *instance) { return is_guest(instance) || (instance->flags & BUFFER_FL_PROFILE) || instance->plugin || instance->events || instance->get_procmap; } static void check_doing_something(void) { struct buffer_instance *instance; for_all_instances(instance) { if (__check_doing_something(instance)) return; } die("no event or plugin was specified... aborting"); } static void update_plugin_instance(struct buffer_instance *instance, enum trace_type type) { const char *plugin = instance->plugin; if (is_guest(instance)) return; if (!plugin) return; check_plugin(plugin); /* * Latency tracers just save the trace and kill * the threads. */ if (strcmp(plugin, "irqsoff") == 0 || strcmp(plugin, "preemptoff") == 0 || strcmp(plugin, "preemptirqsoff") == 0 || strcmp(plugin, "wakeup") == 0 || strcmp(plugin, "wakeup_rt") == 0) { latency = 1; if (host) die("Network tracing not available with latency tracer plugins"); if (type & TRACE_TYPE_STREAM) die("Streaming is not available with latency tracer plugins"); } else if (type == TRACE_TYPE_RECORD) { if (latency) die("Can not record latency tracer and non latency trace together"); } if (fset < 0 && (strcmp(plugin, "function") == 0 || strcmp(plugin, "function_graph") == 0)) die("function tracing not configured on this kernel"); if (type != TRACE_TYPE_EXTRACT) set_plugin_instance(instance, plugin); } static void update_plugins(enum trace_type type) { struct buffer_instance *instance; for_all_instances(instance) update_plugin_instance(instance, type); } static void allocate_seq(void) { struct buffer_instance *instance; for_all_instances(instance) { instance->s_save = malloc(sizeof(struct trace_seq) * instance->cpu_count); instance->s_print = malloc(sizeof(struct trace_seq) * instance->cpu_count); if (!instance->s_save || !instance->s_print) die("Failed to allocate instance info"); } } /* Find the overrun output, and add it to the print seq */ static void add_overrun(int cpu, struct trace_seq *src, struct trace_seq *dst) { const char overrun_str[] = "overrun: "; const char commit_overrun_str[] = "commit overrun: "; const char *p; int overrun; int commit_overrun; p = strstr(src->buffer, overrun_str); if (!p) { /* Warn? */ trace_seq_printf(dst, "CPU %d: no overrun found?\n", cpu); return; } overrun = atoi(p + strlen(overrun_str)); p = strstr(p + 9, commit_overrun_str); if (p) commit_overrun = atoi(p + strlen(commit_overrun_str)); else commit_overrun = -1; if (!overrun && !commit_overrun) return; trace_seq_printf(dst, "CPU %d:", cpu); if (overrun) trace_seq_printf(dst, " %d events lost", overrun); if (commit_overrun) trace_seq_printf(dst, " %d events lost due to commit overrun", commit_overrun); trace_seq_putc(dst, '\n'); } static void record_stats(void) { struct buffer_instance *instance; struct trace_seq *s_save; struct trace_seq *s_print; int cpu; for_all_instances(instance) { if (is_guest(instance)) continue; s_save = instance->s_save; s_print = instance->s_print; for (cpu = 0; cpu < instance->cpu_count; cpu++) { trace_seq_init(&s_save[cpu]); trace_seq_init(&s_print[cpu]); trace_seq_printf(&s_save[cpu], "CPU: %d\n", cpu); tracecmd_stat_cpu_instance(instance, &s_save[cpu], cpu); add_overrun(cpu, &s_save[cpu], &s_print[cpu]); } } } static void print_stats(void) { struct buffer_instance *instance; for_all_instances(instance) print_stat(instance); } static void destroy_stats(void) { struct buffer_instance *instance; int cpu; for_all_instances(instance) { if (is_guest(instance)) continue; for (cpu = 0; cpu < instance->cpu_count; cpu++) { trace_seq_destroy(&instance->s_save[cpu]); trace_seq_destroy(&instance->s_print[cpu]); } } } static void list_event(const char *event) { struct tracecmd_event_list *list; list = malloc(sizeof(*list)); if (!list) die("Failed to allocate list for event"); list->next = listed_events; list->glob = event; listed_events = list; } #define ALL_EVENTS "*/*" static void record_all_events(void) { struct tracecmd_event_list *list; while (listed_events) { list = listed_events; listed_events = list->next; free(list); } list = malloc(sizeof(*list)); if (!list) die("Failed to allocate list for all events"); list->next = NULL; list->glob = ALL_EVENTS; listed_events = list; } static int recording_all_events(void) { return listed_events && strcmp(listed_events->glob, ALL_EVENTS) == 0; } static void add_trigger(struct event_list *event, const char *trigger) { int ret; if (event->trigger) { event->trigger = realloc(event->trigger, strlen(event->trigger) + strlen("\n") + strlen(trigger) + 1); strcat(event->trigger, "\n"); strcat(event->trigger, trigger); } else { ret = asprintf(&event->trigger, "%s", trigger); if (ret < 0) die("Failed to allocate event trigger"); } } static int test_stacktrace_trigger(struct buffer_instance *instance) { char *path; int ret = 0; int fd; path = tracefs_instance_get_file(instance->tracefs, "events/sched/sched_switch/trigger"); clear_trigger(path); fd = open(path, O_WRONLY); if (fd < 0) goto out; ret = write(fd, "stacktrace", 10); if (ret != 10) ret = 0; else ret = 1; close(fd); out: tracefs_put_tracing_file(path); return ret; } static int profile_add_event(struct buffer_instance *instance, const char *event_str, int stack) { struct event_list *event; char buf[BUFSIZ]; char *p; strcpy(buf, "events/"); strncpy(buf + 7, event_str, BUFSIZ - 7); buf[BUFSIZ-1] = 0; if ((p = strstr(buf, ":"))) { *p = '/'; p++; } if (!trace_check_file_exists(instance, buf)) return -1; /* Only add event if it isn't already added */ for (event = instance->events; event; event = event->next) { if (p && strcmp(event->event, p) == 0) break; if (strcmp(event->event, event_str) == 0) break; } if (!event) { event = malloc(sizeof(*event)); if (!event) die("Failed to allocate event"); memset(event, 0, sizeof(*event)); event->event = event_str; add_event(instance, event); } if (!recording_all_events()) list_event(event_str); if (stack) { if (!event->trigger || !strstr(event->trigger, "stacktrace")) add_trigger(event, "stacktrace"); } return 0; } int tracecmd_add_event(const char *event_str, int stack) { return profile_add_event(first_instance, event_str, stack); } static void enable_profile(struct buffer_instance *instance) { int stacktrace = 0; int i; char *trigger_events[] = { "sched:sched_switch", "sched:sched_wakeup", NULL, }; char *events[] = { "exceptions:page_fault_user", "irq:irq_handler_entry", "irq:irq_handler_exit", "irq:softirq_entry", "irq:softirq_exit", "irq:softirq_raise", "sched:sched_process_exec", "raw_syscalls", NULL, }; if (!instance->plugin) { if (trace_check_file_exists(instance, "max_graph_depth")) { instance->plugin = "function_graph"; set_max_graph_depth(instance, "1"); } else warning("Kernel does not support max_graph_depth\n" " Skipping user/kernel profiling"); } if (test_stacktrace_trigger(instance)) stacktrace = 1; else /* * The stacktrace trigger is not implemented with this * kernel, then we need to default to the stack trace option. * This is less efficient but still works. */ save_option(instance, "stacktrace"); for (i = 0; trigger_events[i]; i++) profile_add_event(instance, trigger_events[i], stacktrace); for (i = 0; events[i]; i++) profile_add_event(instance, events[i], 0); } static struct event_list * create_hook_event(struct buffer_instance *instance, const char *system, const char *event) { struct event_list *event_list; char *event_name; int len; if (!system) system = "*"; len = strlen(event); len += strlen(system) + 2; event_name = malloc(len); if (!event_name) die("Failed to allocate %s/%s", system, event); sprintf(event_name, "%s:%s", system, event); event_list = malloc(sizeof(*event_list)); if (!event_list) die("Failed to allocate event list for %s", event_name); memset(event_list, 0, sizeof(*event_list)); event_list->event = event_name; add_event(instance, event_list); list_event(event_name); return event_list; } static void add_hook(struct buffer_instance *instance, const char *arg) { struct event_list *event; struct hook_list *hook; hook = tracecmd_create_event_hook(arg); if (!hook) die("Failed to create event hook %s", arg); hook->instance = instance; hook->next = hooks; hooks = hook; /* Make sure the event is enabled */ event = create_hook_event(instance, hook->start_system, hook->start_event); create_hook_event(instance, hook->end_system, hook->end_event); if (hook->stack) { if (!event->trigger || !strstr(event->trigger, "stacktrace")) add_trigger(event, "stacktrace"); } } void update_first_instance(struct buffer_instance *instance, int topt) { if (topt || instance == &top_instance) first_instance = &top_instance; else first_instance = buffer_instances; } void init_top_instance(void) { if (!top_instance.tracefs) top_instance.tracefs = tracefs_instance_create(NULL); top_instance.cpu_count = tracecmd_count_cpus(); top_instance.flags = BUFFER_FL_KEEP; top_instance.trace_id = tracecmd_generate_traceid(); init_instance(&top_instance); } enum { OPT_compression = 237, OPT_file_ver = 238, OPT_verbose = 239, OPT_tsc2nsec = 240, OPT_fork = 241, OPT_tsyncinterval = 242, OPT_user = 243, OPT_procmap = 244, OPT_quiet = 245, OPT_debug = 246, OPT_no_filter = 247, OPT_max_graph_depth = 248, OPT_tsoffset = 249, OPT_bycomm = 250, OPT_stderr = 251, OPT_profile = 252, OPT_nosplice = 253, OPT_funcstack = 254, OPT_date = 255, OPT_module = 256, OPT_nofifos = 257, OPT_cmdlines_size = 258, OPT_poll = 259, OPT_name = 260, OPT_proxy = 261, OPT_temp = 262, OPT_notimeout = 264, OPT_daemonize = 265, OPT_subbuf = 266, }; void trace_stop(int argc, char **argv) { int topt = 0; struct buffer_instance *instance = &top_instance; init_top_instance(); for (;;) { int c; c = getopt(argc-1, argv+1, "hatB:"); if (c == -1) break; switch (c) { case 'h': usage(argv); break; case 'B': instance = allocate_instance(optarg); if (!instance) die("Failed to create instance"); add_instance(instance, local_cpu_count); break; case 'a': add_all_instances(); break; case 't': /* Force to use top instance */ topt = 1; instance = &top_instance; break; default: usage(argv); } } update_first_instance(instance, topt); tracecmd_disable_tracing(); exit(0); } void trace_restart(int argc, char **argv) { int topt = 0; struct buffer_instance *instance = &top_instance; init_top_instance(); for (;;) { int c; c = getopt(argc-1, argv+1, "hatB:"); if (c == -1) break; switch (c) { case 'h': usage(argv); break; case 'B': instance = allocate_instance(optarg); if (!instance) die("Failed to create instance"); add_instance(instance, local_cpu_count); break; case 'a': add_all_instances(); break; case 't': /* Force to use top instance */ topt = 1; instance = &top_instance; break; default: usage(argv); } } update_first_instance(instance, topt); tracecmd_enable_tracing(); exit(0); } void trace_reset(int argc, char **argv) { int c; int topt = 0; struct buffer_instance *instance = &top_instance; init_top_instance(); /* if last arg is -a, then -b and -d apply to all instances */ int last_specified_all = 0; struct buffer_instance *inst; /* iterator */ while ((c = getopt(argc-1, argv+1, "hab:B:td")) >= 0) { switch (c) { case 'h': usage(argv); break; case 'b': { int size = atoi(optarg); /* Min buffer size is 1 */ if (size <= 1) size = 1; if (last_specified_all) { for_each_instance(inst) { inst->buffer_size = size; } } else { instance->buffer_size = size; } break; } case 'B': last_specified_all = 0; instance = allocate_instance(optarg); if (!instance) die("Failed to create instance"); add_instance(instance, local_cpu_count); /* -d will remove keep */ instance->flags |= BUFFER_FL_KEEP; break; case 't': /* Force to use top instance */ last_specified_all = 0; topt = 1; instance = &top_instance; break; case 'a': last_specified_all = 1; add_all_instances(); for_each_instance(inst) { inst->flags |= BUFFER_FL_KEEP; } break; case 'd': if (last_specified_all) { for_each_instance(inst) { inst->flags &= ~BUFFER_FL_KEEP; } } else { if (is_top_instance(instance)) die("Can not delete top level buffer"); instance->flags &= ~BUFFER_FL_KEEP; } break; } } update_first_instance(instance, topt); tracecmd_disable_all_tracing(1); set_buffer_size(); clear_filters(); clear_triggers(); clear_all_dynamic_events(); clear_error_log(); /* set clock to "local" */ reset_clock(); reset_event_pid(); reset_max_latency_instance(); reset_cpu_mask(); tracecmd_remove_instances(); clear_func_filters(); /* restore tracing_on to 1 */ tracecmd_enable_tracing(); exit(0); } static void init_common_record_context(struct common_record_context *ctx, enum trace_cmd curr_cmd) { memset(ctx, 0, sizeof(*ctx)); ctx->instance = &top_instance; ctx->curr_cmd = curr_cmd; local_cpu_count = tracecmd_count_cpus(); ctx->file_version = tracecmd_default_file_version(); init_top_instance(); } #define IS_EXTRACT(ctx) ((ctx)->curr_cmd == CMD_extract) #define IS_START(ctx) ((ctx)->curr_cmd == CMD_start) #define IS_CMDSET(ctx) ((ctx)->curr_cmd == CMD_set) #define IS_STREAM(ctx) ((ctx)->curr_cmd == CMD_stream) #define IS_PROFILE(ctx) ((ctx)->curr_cmd == CMD_profile) #define IS_RECORD(ctx) ((ctx)->curr_cmd == CMD_record) #define IS_RECORD_AGENT(ctx) ((ctx)->curr_cmd == CMD_record_agent) static void add_argv(struct buffer_instance *instance, char *arg, bool prepend) { instance->argv = realloc(instance->argv, (instance->argc + 1) * sizeof(char *)); if (!instance->argv) die("Can not allocate instance args"); if (prepend) { memmove(instance->argv + 1, instance->argv, instance->argc * sizeof(*instance->argv)); instance->argv[0] = arg; } else { instance->argv[instance->argc] = arg; } instance->argc++; } static void add_arg(struct buffer_instance *instance, int c, const char *opts, struct option *long_options, char *optarg) { char *ptr, *arg; int i, ret; /* Short or long arg */ if (!(c & 0x80)) { ptr = strchr(opts, c); if (!ptr) return; /* Not found? */ ret = asprintf(&arg, "-%c", c); if (ret < 0) die("Can not allocate argument"); add_argv(instance, arg, false); if (ptr[1] == ':') { arg = strdup(optarg); if (!arg) die("Can not allocate arguments"); add_argv(instance, arg, false); } return; } for (i = 0; long_options[i].name; i++) { if (c != long_options[i].val) continue; ret = asprintf(&arg, "--%s", long_options[i].name); if (ret < 0) die("Can not allocate argument"); add_argv(instance, arg, false); if (long_options[i].has_arg) { arg = strdup(optarg); if (!arg) die("Can not allocate arguments"); add_argv(instance, arg, false); } return; } /* Not found? */ } static inline void cmd_check_die(struct common_record_context *ctx, enum trace_cmd id, char *cmd, char *param) { if (ctx->curr_cmd == id) die("%s has no effect with the command %s\n" "Did you mean 'record'?", param, cmd); } static inline void remove_instances(struct buffer_instance *instances) { struct buffer_instance *del; while (instances) { del = instances; instances = instances->next; free(del->name); if (tracefs_instance_is_new(del->tracefs)) tracefs_instance_destroy(del->tracefs); tracefs_instance_free(del->tracefs); free(del); } } static inline void check_instance_die(struct buffer_instance *instance, char *param) { if (instance->delete) die("Instance %s is marked for deletion, invalid option %s", tracefs_instance_get_name(instance->tracefs), param); } static bool clock_is_supported(struct tracefs_instance *instance, const char *clock) { char *all_clocks = NULL; char *ret = NULL; all_clocks = tracefs_instance_file_read(instance, "trace_clock", NULL); if (!all_clocks) return false; ret = strstr(all_clocks, clock); if (ret && (ret == all_clocks || ret[-1] == ' ' || ret[-1] == '[')) { switch (ret[strlen(clock)]) { case ' ': case '\0': case ']': case '\n': break; default: ret = NULL; } } else { ret = NULL; } free(all_clocks); return ret != NULL; } #ifdef PERF static int get_tsc_nsec(int *shift, int *mult) { static int cpu_shift, cpu_mult; static int supported; int cpus = tracecmd_count_cpus(); struct trace_perf perf; int i; if (supported) goto out; supported = -1; if (trace_perf_init(&perf, 1, 0, getpid())) return -1; if (trace_perf_open(&perf)) return -1; cpu_shift = perf.mmap->time_shift; cpu_mult = perf.mmap->time_mult; for (i = 1; i < cpus; i++) { trace_perf_close(&perf); if (trace_perf_init(&perf, 1, i, getpid())) break; if (trace_perf_open(&perf)) break; if (perf.mmap->time_shift != cpu_shift || perf.mmap->time_mult != cpu_mult) { warning("Found different TSC multiplier and shift for CPU %d: %d;%d instead of %d;%d", i, perf.mmap->time_mult, perf.mmap->time_shift, cpu_mult, cpu_shift); break; } } trace_perf_close(&perf); if (i < cpus) return -1; if (cpu_shift || cpu_mult) supported = 1; out: if (supported < 0) return -1; if (shift) *shift = cpu_shift; if (mult) *mult = cpu_mult; return 0; } #else static int get_tsc_nsec(int *shift, int *mult) { return -1; } #endif bool trace_tsc2nsec_is_supported(void) { return get_tsc_nsec(NULL, NULL) == 0; } static void parse_record_options(int argc, char **argv, enum trace_cmd curr_cmd, struct common_record_context *ctx) { const char *plugin = NULL; const char *option; struct event_list *event = NULL; struct event_list *last_event = NULL; struct addrinfo *result; char *pids; char *pid; char *sav; int name_counter = 0; int negative = 0; bool is_proxy = false; struct buffer_instance *instance, *del_list = NULL; int do_children = 0; int fpids_count = 0; init_common_record_context(ctx, curr_cmd); if (IS_CMDSET(ctx)) keep = 1; for (;;) { int option_index = 0; int ret; int c; const char *opts; static struct option long_options[] = { {"date", no_argument, NULL, OPT_date}, {"func-stack", no_argument, NULL, OPT_funcstack}, {"nosplice", no_argument, NULL, OPT_nosplice}, {"nofifos", no_argument, NULL, OPT_nofifos}, {"profile", no_argument, NULL, OPT_profile}, {"stderr", no_argument, NULL, OPT_stderr}, {"by-comm", no_argument, NULL, OPT_bycomm}, {"ts-offset", required_argument, NULL, OPT_tsoffset}, {"max-graph-depth", required_argument, NULL, OPT_max_graph_depth}, {"cmdlines-size", required_argument, NULL, OPT_cmdlines_size}, {"no-filter", no_argument, NULL, OPT_no_filter}, {"debug", no_argument, NULL, OPT_debug}, {"notimeout", no_argument, NULL, OPT_notimeout}, {"quiet", no_argument, NULL, OPT_quiet}, {"help", no_argument, NULL, '?'}, {"proc-map", no_argument, NULL, OPT_procmap}, {"user", required_argument, NULL, OPT_user}, {"module", required_argument, NULL, OPT_module}, {"tsync-interval", required_argument, NULL, OPT_tsyncinterval}, {"fork", no_argument, NULL, OPT_fork}, {"tsc2nsec", no_argument, NULL, OPT_tsc2nsec}, {"poll", no_argument, NULL, OPT_poll}, {"name", required_argument, NULL, OPT_name}, {"verbose", optional_argument, NULL, OPT_verbose}, {"compression", required_argument, NULL, OPT_compression}, {"file-version", required_argument, NULL, OPT_file_ver}, {"proxy", required_argument, NULL, OPT_proxy}, {"temp", required_argument, NULL, OPT_temp}, {"subbuf-size", required_argument, NULL, OPT_subbuf}, {"daemonize", no_argument, NULL, OPT_daemonize}, {NULL, 0, NULL, 0} }; if (IS_EXTRACT(ctx)) opts = "+haf:Fp:co:O:sr:g:l:n:P:N:tb:B:ksiT"; else opts = "+hae:f:FA:p:cC:dDGo:O:s:r:V:vg:l:n:P:N:tb:R:B:ksSiTm:M:H:q"; c = getopt_long (argc-1, argv+1, opts, long_options, &option_index); if (c == -1) break; /* * If the current instance is to record a guest, then save * all the arguments for this instance. */ if (c != 'B' && (c != 'A' || is_proxy) && c != OPT_name && is_guest(ctx->instance) && c != OPT_proxy) { add_arg(ctx->instance, c, opts, long_options, optarg); if (c == 'C') ctx->instance->flags |= BUFFER_FL_HAS_CLOCK; continue; } switch (c) { case 'h': usage(argv); break; case 'a': cmd_check_die(ctx, CMD_set, *(argv+1), "-a"); if (IS_EXTRACT(ctx)) { add_all_instances(); } else { ctx->record_all = 1; record_all_events(); } break; case 'e': check_instance_die(ctx->instance, "-e"); ctx->events = 1; event = malloc(sizeof(*event)); if (!event) die("Failed to allocate event %s", optarg); memset(event, 0, sizeof(*event)); event->event = optarg; add_event(ctx->instance, event); event->neg = negative; event->filter = NULL; last_event = event; if (!ctx->record_all) list_event(optarg); break; case 'f': if (!last_event) die("filter must come after event"); if (last_event->filter) { last_event->filter = realloc(last_event->filter, strlen(last_event->filter) + strlen("&&()") + strlen(optarg) + 1); strcat(last_event->filter, "&&("); strcat(last_event->filter, optarg); strcat(last_event->filter, ")"); } else { ret = asprintf(&last_event->filter, "(%s)", optarg); if (ret < 0) die("Failed to allocate filter %s", optarg); } break; case 'R': if (!last_event) die("trigger must come after event"); add_trigger(event, optarg); break; case OPT_name: if (!ctx->instance) die("No instance defined for name option\n"); if (!is_guest(ctx->instance)) die(" --name is only used for -A options\n"); free(ctx->instance->name); ctx->instance->name = strdup(optarg); if (!ctx->instance->name) die("Failed to allocate name"); break; case OPT_proxy: is_proxy = true; /* fall through */ case 'A': { char *name = NULL; int cid = -1, port = -1; if (!IS_RECORD(ctx)) die("%s is only allowed for record operations", is_proxy ? "--proxy" : "-A"); name = parse_guest_name(optarg, &cid, &port, &result); if (cid == -1 && !result) die("guest %s not found", optarg); if (port == -1) port = TRACE_AGENT_DEFAULT_PORT; if (!name || !*name) { ret = asprintf(&name, "unnamed-%d", name_counter++); if (ret < 0) name = NULL; } else { /* Needs to be allocate */ name = strdup(name); } if (!name) die("Failed to allocate guest name"); ctx->instance = allocate_instance(name); if (!ctx->instance) die("Failed to allocate instance"); if (result) { ctx->instance->flags |= BUFFER_FL_NETWORK; ctx->instance->port_type = USE_TCP; } if (is_proxy) ctx->instance->flags |= BUFFER_FL_PROXY; ctx->instance->flags |= BUFFER_FL_GUEST; ctx->instance->result = result; ctx->instance->cid = cid; ctx->instance->port = port; ctx->instance->name = name; add_instance(ctx->instance, 0); ctx->data_flags |= DATA_FL_GUEST; /* Do not send a clock to a proxy */ if (is_proxy) ctx->instance->flags |= BUFFER_FL_HAS_CLOCK; break; } case 'F': test_set_event_pid(ctx->instance); filter_task = 1; break; case 'G': cmd_check_die(ctx, CMD_set, *(argv+1), "-G"); ctx->global = 1; break; case 'P': check_instance_die(ctx->instance, "-P"); test_set_event_pid(ctx->instance); pids = strdup(optarg); if (!pids) die("strdup"); pid = strtok_r(pids, ",", &sav); while (pid) { fpids_count += add_filter_pid(ctx->instance, atoi(pid), 0); pid = strtok_r(NULL, ",", &sav); ctx->instance->nr_process_pids++; } ctx->instance->process_pids = ctx->instance->filter_pids; free(pids); break; case 'c': check_instance_die(ctx->instance, "-c"); test_set_event_pid(ctx->instance); do_children = 1; if (!ctx->instance->have_event_fork) { #ifdef NO_PTRACE die("-c invalid: ptrace not supported"); #endif do_ptrace = 1; ctx->instance->ptrace_child = 1; } else { save_option(ctx->instance, "event-fork"); } if (ctx->instance->have_func_fork) save_option(ctx->instance, "function-fork"); break; case 'C': check_instance_die(ctx->instance, "-C"); if (strcmp(optarg, TSCNSEC_CLOCK) == 0) { ret = get_tsc_nsec(&ctx->tsc2nsec.shift, &ctx->tsc2nsec.mult); if (ret) die("TSC to nanosecond is not supported"); ctx->instance->flags |= BUFFER_FL_TSC2NSEC; ctx->instance->clock = TSC_CLOCK; } else { ctx->instance->clock = optarg; } if (!clock_is_supported(NULL, ctx->instance->clock)) die("Clock %s is not supported", ctx->instance->clock); ctx->instance->clock = strdup(ctx->instance->clock); if (!ctx->instance->clock) die("Failed allocation"); ctx->instance->flags |= BUFFER_FL_HAS_CLOCK; if (!ctx->clock && !is_guest(ctx->instance)) ctx->clock = ctx->instance->clock; break; case 'v': negative = 1; break; case 'l': add_func(&ctx->instance->filter_funcs, ctx->instance->filter_mod, optarg); ctx->filtered = 1; break; case 'n': check_instance_die(ctx->instance, "-n"); add_func(&ctx->instance->notrace_funcs, ctx->instance->filter_mod, optarg); ctx->filtered = 1; break; case 'g': check_instance_die(ctx->instance, "-g"); add_func(&graph_funcs, ctx->instance->filter_mod, optarg); ctx->filtered = 1; break; case 'p': check_instance_die(ctx->instance, "-p"); if (ctx->instance->plugin) die("only one plugin allowed"); for (plugin = optarg; isspace(*plugin); plugin++) ; ctx->instance->plugin = plugin; for (optarg += strlen(optarg) - 1; optarg > plugin && isspace(*optarg); optarg--) ; optarg++; optarg[0] = '\0'; break; case 'D': ctx->total_disable = 1; /* fall through */ case 'd': ctx->disable = 1; break; case 'o': cmd_check_die(ctx, CMD_set, *(argv+1), "-o"); if (IS_RECORD_AGENT(ctx)) die("-o incompatible with agent recording"); if (host) die("-o incompatible with -N"); if (IS_START(ctx)) die("start does not take output\n" "Did you mean 'record'?"); if (IS_STREAM(ctx)) die("stream does not take output\n" "Did you mean 'record'?"); if (ctx->output) die("only one output file allowed"); ctx->output = optarg; if (IS_PROFILE(ctx)) { int fd; /* pipe the output to this file instead of stdout */ save_stdout = dup(1); close(1); fd = open(optarg, O_WRONLY | O_CREAT | O_TRUNC, 0644); if (fd < 0) die("can't write to %s", optarg); if (fd != 1) { dup2(fd, 1); close(fd); } } break; case OPT_temp: if (ctx->temp) die("Only one temp directory can be listed"); ctx->temp = optarg; break; case 'O': check_instance_die(ctx->instance, "-O"); option = optarg; save_option(ctx->instance, option); break; case 'T': check_instance_die(ctx->instance, "-T"); save_option(ctx->instance, "stacktrace"); break; case 'H': cmd_check_die(ctx, CMD_set, *(argv+1), "-H"); check_instance_die(ctx->instance, "-H"); add_hook(ctx->instance, optarg); ctx->events = 1; break; case 's': cmd_check_die(ctx, CMD_set, *(argv+1), "-s"); if (IS_EXTRACT(ctx)) { if (optarg) usage(argv); recorder_flags |= TRACECMD_RECORD_SNAPSHOT; break; } if (!optarg) usage(argv); sleep_time = atoi(optarg); break; case 'S': cmd_check_die(ctx, CMD_set, *(argv+1), "-S"); ctx->manual = 1; /* User sets events for profiling */ if (!event) ctx->events = 0; break; case 'r': cmd_check_die(ctx, CMD_set, *(argv+1), "-r"); rt_prio = atoi(optarg); break; case 'N': cmd_check_die(ctx, CMD_set, *(argv+1), "-N"); if (!IS_RECORD(ctx)) die("-N only available with record"); if (IS_RECORD_AGENT(ctx)) die("-N incompatible with agent recording"); if (ctx->output) die("-N incompatible with -o"); host = optarg; break; case 'V': cmd_check_die(ctx, CMD_set, *(argv+1), "-V"); if (!IS_RECORD(ctx)) die("-V only available with record"); if (IS_RECORD_AGENT(ctx)) die("-V incompatible with agent recording"); if (ctx->output) die("-V incompatible with -o"); host = optarg; ctx->instance->port_type = USE_VSOCK; break; case 'm': if (max_kb) die("-m can only be specified once"); if (!IS_RECORD(ctx)) die("only record take 'm' option"); max_kb = atoi(optarg); break; case 'M': check_instance_die(ctx->instance, "-M"); ctx->instance->cpumask = alloc_mask_from_hex(ctx->instance, optarg); break; case 't': cmd_check_die(ctx, CMD_set, *(argv+1), "-t"); if (IS_EXTRACT(ctx)) ctx->topt = 1; /* Extract top instance also */ else ctx->instance->port_type = USE_TCP; break; case 'b': check_instance_die(ctx->instance, "-b"); ctx->instance->buffer_size = atoi(optarg); break; case 'B': /* Turn off proxy for the next options */ is_proxy = false; ctx->instance = allocate_instance(optarg); if (!ctx->instance) die("Failed to create instance"); if (IS_CMDSET(ctx)) ctx->instance->delete = negative; negative = 0; if (ctx->instance->delete) { ctx->instance->next = del_list; del_list = ctx->instance; } else add_instance(ctx->instance, local_cpu_count); if (IS_PROFILE(ctx)) ctx->instance->flags |= BUFFER_FL_PROFILE; break; case 'k': cmd_check_die(ctx, CMD_set, *(argv+1), "-k"); keep = 1; break; case 'i': ignore_event_not_found = 1; break; case OPT_user: ctx->user = strdup(optarg); if (!ctx->user) die("Failed to allocate user name"); break; case OPT_procmap: cmd_check_die(ctx, CMD_start, *(argv+1), "--proc-map"); cmd_check_die(ctx, CMD_set, *(argv+1), "--proc-map"); check_instance_die(ctx->instance, "--proc-map"); ctx->instance->get_procmap = 1; break; case OPT_date: cmd_check_die(ctx, CMD_set, *(argv+1), "--date"); ctx->date = 1; if (ctx->data_flags & DATA_FL_OFFSET) die("Can not use both --date and --ts-offset"); ctx->data_flags |= DATA_FL_DATE; break; case OPT_funcstack: func_stack = 1; break; case OPT_nosplice: cmd_check_die(ctx, CMD_set, *(argv+1), "--nosplice"); recorder_flags |= TRACECMD_RECORD_NOSPLICE; break; case OPT_nofifos: cmd_check_die(ctx, CMD_set, *(argv+1), "--nofifos"); no_fifos = true; break; case OPT_profile: cmd_check_die(ctx, CMD_set, *(argv+1), "--profile"); check_instance_die(ctx->instance, "--profile"); handle_init = trace_init_profile; ctx->instance->flags |= BUFFER_FL_PROFILE; ctx->events = 1; break; case OPT_stderr: /* if -o was used (for profile), ignore this */ if (save_stdout >= 0) break; save_stdout = dup(1); close(1); dup2(2, 1); break; case OPT_bycomm: cmd_check_die(ctx, CMD_set, *(argv+1), "--by-comm"); trace_profile_set_merge_like_comms(); break; case OPT_tsoffset: cmd_check_die(ctx, CMD_set, *(argv+1), "--ts-offset"); ctx->date2ts = strdup(optarg); if (ctx->data_flags & DATA_FL_DATE) die("Can not use both --date and --ts-offset"); ctx->data_flags |= DATA_FL_OFFSET; break; case OPT_max_graph_depth: check_instance_die(ctx->instance, "--max-graph-depth"); free(ctx->instance->max_graph_depth); ctx->instance->max_graph_depth = strdup(optarg); if (!ctx->instance->max_graph_depth) die("Could not allocate option"); break; case OPT_cmdlines_size: ctx->saved_cmdlines_size = atoi(optarg); break; case OPT_no_filter: cmd_check_die(ctx, CMD_set, *(argv+1), "--no-filter"); no_filter = true; break; case OPT_debug: tracecmd_set_debug(true); break; case OPT_notimeout: tracecmd_set_notimeout(true); break; case OPT_module: check_instance_die(ctx->instance, "--module"); if (ctx->instance->filter_mod) add_func(&ctx->instance->filter_funcs, ctx->instance->filter_mod, "*"); ctx->instance->filter_mod = optarg; ctx->filtered = 0; break; case OPT_tsyncinterval: cmd_check_die(ctx, CMD_set, *(argv+1), "--tsync-interval"); ctx->tsync_loop_interval = atoi(optarg); break; case OPT_fork: if (!IS_START(ctx)) die("--fork option used for 'start' command only"); fork_process = true; break; case OPT_daemonize: if (!IS_RECORD(ctx)) die("--daemonize option used for 'record' command only"); do_daemonize = true; break; case OPT_tsc2nsec: ret = get_tsc_nsec(&ctx->tsc2nsec.shift, &ctx->tsc2nsec.mult); if (ret) die("TSC to nanosecond is not supported"); ctx->instance->flags |= BUFFER_FL_TSC2NSEC; break; case OPT_subbuf: check_instance_die(ctx->instance, "--subbuf-size"); ctx->instance->subbuf_size = atoi(optarg); break; case OPT_poll: cmd_check_die(ctx, CMD_set, *(argv+1), "--poll"); recorder_flags |= TRACECMD_RECORD_POLL; break; case OPT_compression: cmd_check_die(ctx, CMD_start, *(argv+1), "--compression"); cmd_check_die(ctx, CMD_set, *(argv+1), "--compression"); cmd_check_die(ctx, CMD_stream, *(argv+1), "--compression"); cmd_check_die(ctx, CMD_profile, *(argv+1), "--compression"); if (strcmp(optarg, "any") && strcmp(optarg, "none") && !tracecmd_compress_is_supported(optarg, NULL)) die("Compression algorithm %s is not supported", optarg); ctx->compression = strdup(optarg); break; case OPT_file_ver: if (ctx->curr_cmd != CMD_record && ctx->curr_cmd != CMD_record_agent) die("--file_version has no effect with the command %s\n", *(argv+1)); ctx->file_version = atoi(optarg); if (ctx->file_version < FILE_VERSION_MIN || ctx->file_version > FILE_VERSION_MAX) die("Unsupported file version %d, " "supported versions are from %d to %d", ctx->file_version, FILE_VERSION_MIN, FILE_VERSION_MAX); break; case OPT_quiet: case 'q': quiet = true; break; case OPT_verbose: if (trace_set_verbose(optarg) < 0) die("invalid verbose level %s", optarg); break; default: usage(argv); } } remove_instances(del_list); /* If --date is specified, prepend it to all guest VM flags */ if (ctx->date) { struct buffer_instance *instance; for_all_instances(instance) { if (is_guest(instance)) add_argv(instance, "--date", true); } } if (!ctx->filtered && ctx->instance->filter_mod) add_func(&ctx->instance->filter_funcs, ctx->instance->filter_mod, "*"); if (do_children && !filter_task && !fpids_count) die(" -c can only be used with -F (or -P with event-fork support)"); if ((argc - optind) >= 2) { if (IS_EXTRACT(ctx)) die("Command extract does not take any commands\n" "Did you mean 'record'?"); ctx->run_command = 1; } if (ctx->user && !ctx->run_command) warning("--user %s is ignored, no command is specified", ctx->user); if (top_instance.get_procmap) { /* use ptrace to get procmap on the command exit */ if (ctx->run_command) { do_ptrace = 1; } else if (!top_instance.nr_filter_pids) { warning("--proc-map is ignored for top instance, " "no command or filtered PIDs are specified."); top_instance.get_procmap = 0; } } for_all_instances(instance) { if (instance->get_procmap && !instance->nr_filter_pids) { warning("--proc-map is ignored for instance %s, " "no filtered PIDs are specified.", tracefs_instance_get_name(instance->tracefs)); instance->get_procmap = 0; } } } static enum trace_type get_trace_cmd_type(enum trace_cmd cmd) { const static struct { enum trace_cmd cmd; enum trace_type ttype; } trace_type_per_command[] = { {CMD_record, TRACE_TYPE_RECORD}, {CMD_stream, TRACE_TYPE_STREAM}, {CMD_extract, TRACE_TYPE_EXTRACT}, {CMD_profile, TRACE_TYPE_STREAM}, {CMD_start, TRACE_TYPE_START}, {CMD_record_agent, TRACE_TYPE_RECORD}, {CMD_set, TRACE_TYPE_SET} }; for (int i = 0; i < ARRAY_SIZE(trace_type_per_command); i++) { if (trace_type_per_command[i].cmd == cmd) return trace_type_per_command[i].ttype; } die("Trace type UNKNOWN for the given cmd_fun"); } static void finalize_record_trace(struct common_record_context *ctx) { struct buffer_instance *instance; if (keep) return; update_reset_files(); update_reset_triggers(); if (clear_function_filters) clear_func_filters(); set_plugin("nop"); tracecmd_remove_instances(); /* If tracing_on was enabled before we started, set it on now */ for_all_instances(instance) { if (instance->flags & BUFFER_FL_KEEP) write_tracing_on(instance, instance->tracing_on_init_val); if (is_proxy_server(instance) && instance->network_handle) { /* Now wait for the recorder to be ready for us to send more */ tracecmd_msg_wait(ctx->instance->msg_handle); if (ctx->tsc2nsec.mult) add_tsc2nsec(ctx->instance->network_handle, &ctx->tsc2nsec); tracecmd_write_guest_time_shift(ctx->instance->network_handle, ctx->instance->tsync); tracecmd_msg_send_options(ctx->instance->msg_handle, ctx->instance->network_handle); } if (is_agent(instance)) { tracecmd_msg_send_close_resp_msg(instance->msg_handle); tracecmd_output_close(instance->network_handle); } } if (host) tracecmd_output_close(ctx->instance->network_handle); } static bool has_local_instances(void) { struct buffer_instance *instance; for_all_instances(instance) { if (is_guest(instance)) continue; if (host && instance->msg_handle) continue; return true; } return false; } static void set_tsync_params(struct common_record_context *ctx) { struct buffer_instance *instance; int shift, mult; bool force_tsc = false; char *clock = NULL; if (!ctx->clock) { /* * If no clock is configured && * KVM time sync protocol is available && * there is information of each guest PID process && * tsc-x86 clock is supported && * TSC to nsec multiplier and shift are available: * force using the x86-tsc clock for this host-guest tracing session * and store TSC to nsec multiplier and shift. */ if (tsync_proto_is_supported("kvm") && trace_have_guests_pid() && clock_is_supported(NULL, TSC_CLOCK) && !get_tsc_nsec(&shift, &mult) && mult) { clock = strdup(TSC_CLOCK); if (!clock) die("Cannot not allocate clock"); ctx->tsc2nsec.mult = mult; ctx->tsc2nsec.shift = shift; force_tsc = true; } else { /* Use the current clock of the first host instance */ clock = get_trace_clock(true); } } else { clock = strdup(ctx->clock); if (!clock) die("Cannot not allocate clock"); } if (!clock && !ctx->tsync_loop_interval) goto out; for_all_instances(instance) { if (clock && !(instance->flags & BUFFER_FL_HAS_CLOCK)) { /* use the same clock in all tracing peers */ if (is_guest(instance)) { if (!instance->clock) { instance->clock = strdup(clock); if (!instance->clock) die("Can not allocate instance clock"); } add_argv(instance, (char *)instance->clock, true); add_argv(instance, "-C", true); if (ctx->tsc2nsec.mult) instance->flags |= BUFFER_FL_TSC2NSEC; } else if (force_tsc && !instance->clock) { instance->clock = strdup(clock); if (!instance->clock) die("Can not allocate instance clock"); } } instance->tsync_loop_interval = ctx->tsync_loop_interval; } out: free(clock); } static void record_trace(int argc, char **argv, struct common_record_context *ctx) { enum trace_type type = get_trace_cmd_type(ctx->curr_cmd); struct buffer_instance *instance; struct filter_pids *pid; if (do_daemonize) daemonize_start(); /* * If top_instance doesn't have any plugins or events, then * remove it from being processed. */ if (!__check_doing_something(&top_instance) && !filter_task) first_instance = buffer_instances; else ctx->topt = 1; update_first_instance(ctx->instance, ctx->topt); if (!IS_CMDSET(ctx)) { check_doing_something(); check_function_plugin(); } if (!ctx->output) ctx->output = DEFAULT_INPUT_FILE; /* Make sure top_instance.output_file exists */ if (!top_instance.output_file) top_instance.output_file = strdup(ctx->output); if (ctx->data_flags & (DATA_FL_GUEST | DATA_FL_PROXY)) set_tsync_params(ctx); make_instances(); /* Save the state of tracing_on before starting */ for_all_instances(instance) { if (ctx->temp) instance->temp_dir = ctx->temp; /* The -o could have been done after -B */ if (!instance->output_file) instance->output_file = strdup(ctx->output); if (!instance->output_file) die("Failed to allocate output file name for instance"); if (!ctx->manual && instance->flags & BUFFER_FL_PROFILE) enable_profile(instance); instance->tracing_on_init_val = read_tracing_on(instance); /* Some instances may not be created yet */ if (instance->tracing_on_init_val < 0) instance->tracing_on_init_val = 1; } if (ctx->events) expand_event_list(); page_size = getpagesize(); if (!is_guest(ctx->instance)) fset = set_ftrace(ctx->instance, !ctx->disable, ctx->total_disable); if (!IS_CMDSET(ctx)) tracecmd_disable_all_tracing(1); for_all_instances(instance) set_clock(ctx, instance); /* Record records the date first */ if (ctx->date && ((IS_RECORD(ctx) && has_local_instances()) || IS_RECORD_AGENT(ctx))) ctx->date2ts = get_date_to_ts(); for_all_instances(instance) { set_funcs(instance); set_mask(instance); } if (ctx->events) { for_all_instances(instance) enable_events(instance); } set_saved_cmdlines_size(ctx); set_buffer_size(); update_plugins(type); set_options(); for_all_instances(instance) { if (instance->max_graph_depth) { set_max_graph_depth(instance, instance->max_graph_depth); free(instance->max_graph_depth); instance->max_graph_depth = NULL; } } allocate_seq(); if (type & (TRACE_TYPE_RECORD | TRACE_TYPE_STREAM)) { signal(SIGINT, do_sig); signal(SIGTERM, do_sig); if (!latency) start_threads(type, ctx); } if (ctx->run_command) { run_cmd(type, ctx->user, (argc - optind) - 1, &argv[optind + 1]); } else if (ctx->instance && is_agent(ctx->instance)) { update_task_filter(); tracecmd_enable_tracing(); tracecmd_msg_wait_close(ctx->instance->msg_handle); } else { bool pwait = false; bool wait_indefinitely = false; update_task_filter(); if (!IS_CMDSET(ctx)) tracecmd_enable_tracing(); if (type & (TRACE_TYPE_START | TRACE_TYPE_SET)) exit(0); /* We don't ptrace ourself */ if (do_ptrace) { for_all_instances(instance) { for (pid = instance->filter_pids; pid; pid = pid->next) { if (!pid->exclude && instance->ptrace_child) { ptrace_attach(instance, pid->pid); pwait = true; } } } } if (do_daemonize) { daemonize_finish(); printf("Send SIGINT/SIGTERM to pid %d to stop recording\n", getpid()); } else { /* sleep till we are woken with Ctrl^C */ printf("Hit Ctrl^C to stop recording\n"); } for_all_instances(instance) { /* If an instance is not tracing individual processes * or there is an error while waiting for a process to * exit, fallback to waiting indefinitely. */ if (!instance->nr_process_pids || trace_wait_for_processes(instance)) wait_indefinitely = true; } while (!finished && wait_indefinitely) trace_or_sleep(type, pwait); /* Streams need to be flushed one more time */ if (type & TRACE_TYPE_STREAM) trace_stream_read(pids, recorder_threads, -1); } tell_guests_to_stop(ctx); tracecmd_disable_tracing(); if (!latency) stop_threads(type); record_stats(); if (!latency) wait_threads(); if (is_proxy_server(ctx->instance) && ctx->instance->network_handle) { tracecmd_tsync_with_guest_stop(ctx->instance->tsync); trace_add_guest_info(ctx->instance->network_handle, ctx->instance); if (ctx->tsc2nsec.mult) add_tsc2nsec(ctx->instance->network_handle, &ctx->tsc2nsec); tracecmd_write_options(ctx->instance->network_handle); tracecmd_write_meta_strings(ctx->instance->network_handle); tracecmd_msg_finish_sending_data(ctx->instance->msg_handle); } if (IS_RECORD(ctx)) { record_data(ctx); delete_thread_data(); } else print_stats(); if (!keep) tracecmd_disable_all_tracing(0); destroy_stats(); finalize_record_trace(ctx); if (created_pidfile) remove_pid_file(RECORD_PIDFILE); } /* * This function contains common code for the following commands: * record, start, stream, profile. */ static void record_trace_command(int argc, char **argv, struct common_record_context *ctx) { tracecmd_tsync_init(); record_trace(argc, argv, ctx); } void trace_start(int argc, char **argv) { struct common_record_context ctx; parse_record_options(argc, argv, CMD_start, &ctx); record_trace_command(argc, argv, &ctx); exit(0); } void trace_set(int argc, char **argv) { struct common_record_context ctx; parse_record_options(argc, argv, CMD_set, &ctx); record_trace_command(argc, argv, &ctx); exit(0); } void trace_extract(int argc, char **argv) { struct common_record_context ctx; struct buffer_instance *instance; enum trace_type type; parse_record_options(argc, argv, CMD_extract, &ctx); type = get_trace_cmd_type(ctx.curr_cmd); update_first_instance(ctx.instance, ctx.topt); check_function_plugin(); if (!ctx.output) ctx.output = DEFAULT_INPUT_FILE; /* Save the state of tracing_on before starting */ for_all_instances(instance) { instance->output_file = strdup(ctx.output); if (!instance->output_file) die("Failed to allocate output file name for instance"); if (!ctx.manual && instance->flags & BUFFER_FL_PROFILE) enable_profile(ctx.instance); instance->tracing_on_init_val = read_tracing_on(instance); /* Some instances may not be created yet */ if (instance->tracing_on_init_val < 0) instance->tracing_on_init_val = 1; } /* Extracting data records all events in the system. */ if (!ctx.record_all) record_all_events(); if (ctx.events) expand_event_list(); page_size = getpagesize(); update_plugins(type); set_options(); for_all_instances(instance) { if (instance->max_graph_depth) { set_max_graph_depth(instance, instance->max_graph_depth); free(instance->max_graph_depth); instance->max_graph_depth = NULL; } } allocate_seq(); flush_threads(); record_stats(); if (!keep) tracecmd_disable_all_tracing(0); /* extract records the date after extraction */ if (ctx.date) { /* * We need to start tracing, don't let other traces * screw with our trace_marker. */ tracecmd_disable_all_tracing(1); ctx.date2ts = get_date_to_ts(); } record_data(&ctx); delete_thread_data(); destroy_stats(); finalize_record_trace(&ctx); exit(0); } void trace_stream(int argc, char **argv) { struct common_record_context ctx; /* Default sleep time is half a second for streaming */ sleep_time = 500000; parse_record_options(argc, argv, CMD_stream, &ctx); record_trace_command(argc, argv, &ctx); exit(0); } void trace_profile(int argc, char **argv) { struct common_record_context ctx; parse_record_options(argc, argv, CMD_profile, &ctx); handle_init = trace_init_profile; ctx.events = 1; /* * If no instances were set, then enable profiling on the top instance. */ if (!buffer_instances) top_instance.flags |= BUFFER_FL_PROFILE; record_trace_command(argc, argv, &ctx); do_trace_profile(); exit(0); } void trace_record(int argc, char **argv) { struct common_record_context ctx; parse_record_options(argc, argv, CMD_record, &ctx); record_trace_command(argc, argv, &ctx); exit(0); } /** * trace_record_agent - record command running from the agent * @msg_handle: The handle to communicate with the peer * @cpus: The number of CPUs the agent has to record * @fds: The array of file descriptors for the CPUs * @argc: The number of arguments to pass to the record session * @argv: The arguments to pass to the record session * @use_fifos: True if fifos are used instead of sockets. * @trace_id: The agent's trace_id * @rcid: Remote cid if the agent is a proxy, negative otherwise. * @host: Set if this is an IP connection and not a vsocket one * * This is used to enable tracing via the record command just * like trace-cmd record, but it is being done via the agent * and all the data is being transfered to the peer that is * connected on the other end of the sockets. * * Returns zero on success, negative otherwise. */ int trace_record_agent(struct tracecmd_msg_handle *msg_handle, int cpus, int *fds, int argc, char **argv, bool use_fifos, struct tracecmd_time_sync *tsync, unsigned long long trace_id, int rcid, const char *host) { struct common_record_context ctx; char **argv_plus; /* Reset optind for getopt_long */ optind = 1; /* * argc is the number of elements in argv, but we need to convert * argc and argv into "trace-cmd", "record", argv. * where argc needs to grow by two. */ argv_plus = calloc(argc + 2, sizeof(char *)); if (!argv_plus) die("Failed to allocate record arguments"); argv_plus[0] = "trace-cmd"; argv_plus[1] = "record"; memmove(argv_plus + 2, argv, argc * sizeof(char *)); argc += 2; parse_record_options(argc, argv_plus, CMD_record_agent, &ctx); if (ctx.run_command) return -EINVAL; ctx.instance->fds = fds; ctx.instance->use_fifos = use_fifos; ctx.instance->flags |= BUFFER_FL_AGENT; if (rcid >= 0) ctx.data_flags |= DATA_FL_PROXY; ctx.instance->msg_handle = msg_handle; ctx.instance->host = host; ctx.instance->tsync = tsync; ctx.instance->cid = rcid; msg_handle->version = V3_PROTOCOL; top_instance.trace_id = trace_id; record_trace(argc, argv, &ctx); free(argv_plus); return 0; } trace-cmd-v3.3.1/tracecmd/trace-restore.c000066400000000000000000000070121470231550600202530ustar00rootroot00000000000000// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt * */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "trace-local.h" static struct tracecmd_output *create_output(const char *file, const char *tracing_dir, const char *kallsyms) { struct tracecmd_output *out; out = tracecmd_output_create(file); if (!out) goto error; if (tracing_dir && tracecmd_output_set_trace_dir(out, tracing_dir)) goto error; if (kallsyms && tracecmd_output_set_kallsyms(out, kallsyms)) goto error; if (tracecmd_output_write_headers(out, NULL)) goto error; return out; error: if (out) tracecmd_output_close(out); unlink(file); return NULL; } void trace_restore (int argc, char **argv) { struct tracecmd_output *handle; const char *output_file = DEFAULT_INPUT_FILE; const char *output = NULL; const char *input = NULL; const char *tracing_dir = NULL; const char *kallsyms = NULL; struct stat st1; struct stat st2; int first_arg; int create_only = 0; int args; int c; if (argc < 2) usage(argv); if (strcmp(argv[1], "restore") != 0) usage(argv); while ((c = getopt(argc-1, argv+1, "+hco:i:t:k:")) >= 0) { switch (c) { case 'h': usage(argv); break; case 'c': if (input) die("-c and -i are incompatible"); create_only = 1; /* make output default to partial */ output_file = "trace-partial.dat"; break; case 't': tracing_dir = optarg; break; case 'k': kallsyms = optarg; break; case 'o': if (output) die("only one output file allowed"); output = optarg; break; case 'i': if (input) die("only one input file allowed"); if (create_only) die("-c and -i are incompatible"); input = optarg; break; default: usage(argv); } } if (!output) output = output_file; if ((argc - optind) <= 1) { if (!create_only) { warning("No data files found"); usage(argv); } handle = create_output(output, tracing_dir, kallsyms); if (!handle) die("Unabled to create output file %s", output); if (tracecmd_write_cmdlines(handle) < 0) die("Failed to write command lines"); tracecmd_output_close(handle); exit(0); } first_arg = optind + 1; args = argc - first_arg; printf("first = %d %s args=%d\n", first_arg, argv[first_arg], args); /* Make sure input and output are not the same file */ if (input && output) { if (stat(input, &st1) < 0) die("%s:", input); /* output exists? otherwise we don't care */ if (stat(output, &st2) == 0) { if (st1.st_ino == st2.st_ino && st1.st_dev == st2.st_dev) die("input and output file are the same"); } } if (input) { struct tracecmd_input *ihandle; ihandle = tracecmd_alloc(input, 0); if (!ihandle) die("error reading file %s", input); /* make sure headers are ok */ if (tracecmd_read_headers(ihandle, TRACECMD_FILE_CMD_LINES) < 0) die("error reading file %s headers", input); handle = tracecmd_copy(ihandle, output, TRACECMD_FILE_CMD_LINES, 0, NULL); tracecmd_close(ihandle); } else { handle = tracecmd_output_create(output); tracecmd_output_write_headers(handle, NULL); } if (!handle) die("error writing to %s", output); if (tracecmd_append_cpu_data(handle, args, &argv[first_arg]) < 0) die("failed to append data"); return; } trace-cmd-v3.3.1/tracecmd/trace-setup-guest.c000066400000000000000000000111201470231550600210500ustar00rootroot00000000000000// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2019 VMware Inc, Slavomir Kaslev * */ #include #include #include #include #include #include #include #include #include #include "trace-local.h" #include "trace-msg.h" static int make_dir(const char *path, mode_t mode) { char buf[PATH_MAX+2], *p; strncpy(buf, path, sizeof(buf)); if (buf[PATH_MAX]) return -E2BIG; for (p = buf; *p; p++) { p += strspn(p, "/"); p += strcspn(p, "/"); *p = '\0'; if (mkdir(buf, mode) < 0 && errno != EEXIST) return -errno; *p = '/'; } return 0; } static int make_fifo(const char *path, mode_t mode) { struct stat st; if (!stat(path, &st)) { if (S_ISFIFO(st.st_mode)) return 0; return -EEXIST; } if (mkfifo(path, mode)) return -errno; return 0; } static int make_guest_dir(const char *guest) { char path[PATH_MAX]; snprintf(path, sizeof(path), GUEST_DIR_FMT, guest); return make_dir(path, 0750); } static int make_guest_fifo(const char *guest, int cpu, mode_t mode) { static const char *exts[] = {".in", ".out"}; char path[PATH_MAX]; int i, ret = 0; for (i = 0; i < ARRAY_SIZE(exts); i++) { snprintf(path, sizeof(path), GUEST_FIFO_FMT "%s", guest, cpu, exts[i]); ret = make_fifo(path, mode); if (ret < 0) break; } return ret; } static int make_guest_fifos(const char *guest, int nr_cpus, mode_t mode) { int i, ret = 0; mode_t mask; mask = umask(0); for (i = 0; i < nr_cpus; i++) { ret = make_guest_fifo(guest, i, mode); if (ret < 0) break; } umask(mask); return ret; } static int get_guest_cpu_count(const char *guest) { const char *cmd_fmt = "virsh vcpucount --maximum '%s' 2>/dev/null"; int nr_cpus = -1; char cmd[1024]; FILE *f; snprintf(cmd, sizeof(cmd), cmd_fmt, guest); f = popen(cmd, "r"); if (!f) return -errno; fscanf(f, "%d", &nr_cpus); pclose(f); return nr_cpus; } static int attach_guest_fifos(const char *guest, int nr_cpus) { const char *cmd_fmt = "virsh attach-device --config '%s' '%s' >/dev/null 2>/dev/null"; const char *xml_fmt = "\n" " \n" " \n" ""; char tmp_path[PATH_MAX], path[PATH_MAX]; char cmd[PATH_MAX + 256], xml[PATH_MAX + 256]; int i, fd, ret = 0; strcpy(tmp_path, "/tmp/pipexmlXXXXXX"); fd = mkstemp(tmp_path); if (fd < 0) return fd; for (i = 0; i < nr_cpus; i++) { snprintf(path, sizeof(path), GUEST_FIFO_FMT, guest, i); snprintf(xml, sizeof(xml), xml_fmt, path, GUEST_PIPE_NAME, i); pwrite(fd, xml, strlen(xml), 0); snprintf(cmd, sizeof(cmd), cmd_fmt, guest, tmp_path); errno = 0; if (system(cmd) != 0) { ret = -errno; break; } } close(fd); unlink(tmp_path); return ret; } static void do_setup_guest(const char *guest, int nr_cpus, mode_t mode, gid_t gid, bool attach) { gid_t save_egid; int ret; if (gid != -1) { save_egid = getegid(); ret = setegid(gid); if (ret < 0) die("failed to set effective group ID"); } ret = make_guest_dir(guest); if (ret < 0) die("failed to create guest directory for %s", guest); ret = make_guest_fifos(guest, nr_cpus, mode); if (ret < 0) die("failed to create FIFOs for %s", guest); if (attach) { ret = attach_guest_fifos(guest, nr_cpus); if (ret < 0) die("failed to attach FIFOs to %s", guest); } if (gid != -1) { ret = setegid(save_egid); if (ret < 0) die("failed to restore effective group ID"); } } void trace_setup_guest(int argc, char **argv) { bool attach = false; struct group *group; mode_t mode = 0660; int nr_cpus = -1; gid_t gid = -1; char *guest; if (argc < 2) usage(argv); if (strcmp(argv[1], "setup-guest") != 0) usage(argv); for (;;) { int c, option_index = 0; static struct option long_options[] = { {"help", no_argument, NULL, '?'}, {NULL, 0, NULL, 0} }; c = getopt_long(argc-1, argv+1, "+hc:p:g:a", long_options, &option_index); if (c == -1) break; switch (c) { case 'h': usage(argv); break; case 'c': nr_cpus = atoi(optarg); break; case 'p': mode = strtol(optarg, NULL, 8); break; case 'g': group = getgrnam(optarg); if (!group) die("group %s does not exist", optarg); gid = group->gr_gid; break; case 'a': attach = true; break; default: usage(argv); } } if (optind != argc-2) usage(argv); guest = argv[optind+1]; if (nr_cpus <= 0) nr_cpus = get_guest_cpu_count(guest); if (nr_cpus <= 0) die("invalid number of cpus for guest %s", guest); do_setup_guest(guest, nr_cpus, mode, gid, attach); } trace-cmd-v3.3.1/tracecmd/trace-show.c000066400000000000000000000124231470231550600175520ustar00rootroot00000000000000// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt * */ #include #include #include #include "tracefs.h" #include "trace-local.h" enum { OPT_cpumask = 240, OPT_graph_notrace, OPT_graph_function, OPT_ftrace_pid, OPT_ftrace_notrace, OPT_ftrace_filter, OPT_buffer_subbuf_size_kb, OPT_buffer_total_size_kb, OPT_buffer_size_kb, OPT_buffer_percent, OPT_current_tracer, OPT_tracing_on, OPT_hist, OPT_trigger, }; void trace_show(int argc, char **argv) { const char *buffer = NULL; const char *file = "trace"; const char *cpu = NULL; struct buffer_instance *instance = &top_instance; char *hist = NULL; char *trigger = NULL; char cpu_path[128]; char *path; int snap = 0; int pipe = 0; int show_name = 0; int option_index = 0; int stop = 0; int c; static struct option long_options[] = { {"hist", required_argument, NULL, OPT_hist}, {"trigger", required_argument, NULL, OPT_trigger}, {"tracing_on", no_argument, NULL, OPT_tracing_on}, {"current_tracer", no_argument, NULL, OPT_current_tracer}, {"buffer_size", no_argument, NULL, OPT_buffer_size_kb}, {"buffer_total_size", no_argument, NULL, OPT_buffer_total_size_kb}, {"buffer_subbuf_size", no_argument, NULL, OPT_buffer_subbuf_size_kb}, {"buffer_percent", no_argument, NULL, OPT_buffer_percent}, {"ftrace_filter", no_argument, NULL, OPT_ftrace_filter}, {"ftrace_notrace", no_argument, NULL, OPT_ftrace_notrace}, {"ftrace_pid", no_argument, NULL, OPT_ftrace_pid}, {"graph_function", no_argument, NULL, OPT_graph_function}, {"graph_notrace", no_argument, NULL, OPT_graph_notrace}, {"cpumask", no_argument, NULL, OPT_cpumask}, {"help", no_argument, NULL, '?'}, {NULL, 0, NULL, 0} }; init_top_instance(); while ((c = getopt_long(argc-1, argv+1, "B:c:fsp", long_options, &option_index)) >= 0) { switch (c) { case 'h': usage(argv); break; case 'B': if (buffer) die("Can only show one buffer at a time"); buffer = optarg; instance = allocate_instance(optarg); if (!instance) die("Failed to create instance"); break; case 'c': if (cpu) die("Can only show one CPU at a time"); cpu = optarg; break; case 'f': show_name = 1; break; case 's': snap = 1; if (pipe) die("Can not have -s and -p together"); break; case 'p': pipe = 1; if (snap) die("Can not have -s and -p together"); break; case OPT_hist: hist = optarg; break; case OPT_trigger: trigger = optarg; break; case OPT_tracing_on: show_instance_file(instance, "tracing_on"); stop = 1; break; case OPT_current_tracer: show_instance_file(instance, "current_tracer"); stop = 1; break; case OPT_buffer_size_kb: show_instance_file(instance, "buffer_size_kb"); stop = 1; break; case OPT_buffer_total_size_kb: show_instance_file(instance, "buffer_total_size_kb"); stop = 1; break; case OPT_buffer_subbuf_size_kb: show_instance_file(instance, "buffer_subbuf_size_kb"); stop = 1; break; case OPT_buffer_percent: show_instance_file(instance, "buffer_percent"); stop = 1; break; case OPT_ftrace_filter: show_instance_file(instance, "set_ftrace_filter"); stop = 1; break; case OPT_ftrace_notrace: show_instance_file(instance, "set_ftrace_notrace"); stop = 1; break; case OPT_ftrace_pid: show_instance_file(instance, "set_ftrace_pid"); stop = 1; break; case OPT_graph_function: show_instance_file(instance, "set_graph_function"); stop = 1; break; case OPT_graph_notrace: show_instance_file(instance, "set_graph_notrace"); stop = 1; break; case OPT_cpumask: show_instance_file(instance, "tracing_cpumask"); stop = 1; break; default: usage(argv); } } if (stop) exit(0); if (pipe) file = "trace_pipe"; else if (snap) file = "snapshot"; if (hist || trigger) { char **systems = NULL; char *system = NULL; char *event = hist ? hist : trigger; char *file = hist ? "hist" : "trigger"; char *p; if ((p = strstr(event, ":"))) { system = event; event = p + 1; *p = '\0'; } if (!system) { systems = tracefs_event_systems(NULL); for (int i = 0; systems && systems[i]; i++) { system = systems[i]; if (tracefs_event_file_exists(instance->tracefs, system, event, file)) break; } if (!system) die("Could not find system of event %s", event); } path = tracefs_event_file_read(instance->tracefs, system, event, file, NULL); tracefs_list_free(systems); if (!path) die("Could not find hist for %s%s%s", system ? system : "", system ? ":":"", event); printf("%s\n", path); free(path); exit(0); } if (cpu) { char *endptr; long val; errno = 0; val = strtol(cpu, &endptr, 0); if (errno || cpu == endptr) die("Invalid CPU index '%s'", cpu); snprintf(cpu_path, 128, "per_cpu/cpu%ld/%s", val, file); file = cpu_path; } if (buffer) { int ret; ret = asprintf(&path, "instances/%s/%s", buffer, file); if (ret < 0) die("Failed to allocate instance path %s", file); file = path; } if (show_name) { char *name; name = tracefs_get_tracing_file(file); printf("%s\n", name); tracefs_put_tracing_file(name); } show_file(file); if (buffer) free(path); return; } trace-cmd-v3.3.1/tracecmd/trace-snapshot.c000066400000000000000000000041051470231550600204270ustar00rootroot00000000000000// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2013 Red Hat Inc, Steven Rostedt * */ #include #include #include #include #include #include #include #include #include "tracefs.h" #include "trace-local.h" static void write_file(const char *name, char *val) { char *path; int fd; ssize_t n; path = tracefs_get_tracing_file(name); fd = open(path, O_WRONLY); if (fd < 0) die("writing %s", path); n = write(fd, val, strlen(val)); if (n < 0) die("failed to write to %s\n", path); tracefs_put_tracing_file(path); close(fd); } void trace_snapshot (int argc, char **argv) { const char *buffer = NULL; const char *file = "snapshot"; struct stat st; char *name; char cpu_path[128]; int take_snap = 0; int reset_snap = 0; int free_snap = 0; int cpu = -1; int ret; int c; if (argc < 2) usage(argv); if (strcmp(argv[1], "snapshot") != 0) usage(argv); while ((c = getopt(argc-1, argv+1, "srfB:c:")) >= 0) { switch (c) { case 'h': usage(argv); break; case 's': take_snap = 1; if (free_snap) die("can't take snapshot and free it at the same time"); break; case 'f': free_snap = 1; if (take_snap) die("can't take snapshot and free it at the same time"); break; case 'r': reset_snap = 1; break; case 'B': if (buffer) die("Can only do one buffer at a time"); buffer = optarg; break; case 'c': if (cpu >= 0) die("Can only do one CPU (or all) at a time"); cpu = atoi(optarg); break; default: usage(argv); } } if (cpu >= 0) { snprintf(cpu_path, 128, "per_cpu/cpu%d/%s", cpu, file); file = cpu_path; } name = tracefs_get_tracing_file(file); ret = stat(name, &st); if (ret < 0) die("Snapshot feature is not supported by this kernel"); tracefs_put_tracing_file(name); if (!reset_snap && !take_snap && !free_snap) { show_file(file); exit(0); } if (reset_snap) write_file(file, "2"); if (free_snap) write_file(file, "0"); if (take_snap) write_file(file, "1"); } trace-cmd-v3.3.1/tracecmd/trace-split.c000066400000000000000000000474311470231550600177340ustar00rootroot00000000000000// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2010 Red Hat Inc, Steven Rostedt * */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "list.h" #include "trace-local.h" static unsigned int page_size; static const char *default_input_file = DEFAULT_INPUT_FILE; static const char *default_top_instance_name = "top"; static const char *input_file; enum split_types { SPLIT_NONE, /* The order of these must be reverse of the case statement in the options */ SPLIT_SECONDS, SPLIT_MSECS, SPLIT_USECS, SPLIT_EVENTS, SPLIT_PAGES, SPLIT_NR_TYPES, }; struct cpu_data { unsigned long long ts; unsigned long long offset; unsigned long long missed_events; struct tep_record *record; int cpu; int fd; int index; void *commit; void *page; char *file; }; struct handle_list { struct list_head list; char *name; int index; struct tracecmd_input *handle; /* Identify the top instance in the input trace. */ bool was_top_instance; }; static struct list_head handle_list; /** * get_handle - Obtain a handle that must be closed once finished. */ static struct tracecmd_input *get_handle(struct handle_list *item) { struct tracecmd_input *top_handle, *handle; top_handle = tracecmd_open(input_file, 0); if (!top_handle) die("Error reading %s", input_file); if (item->was_top_instance) { return top_handle; } else { handle = tracecmd_buffer_instance_handle(top_handle, item->index); if (!handle) warning("Could not retrieve handle %s", item->name); tracecmd_close(top_handle); return handle; } } static void add_handle(const char *name, int index, bool was_top_instance) { struct handle_list *item; item = calloc(1, sizeof(*item)); if (!item) die("Failed to allocate handle item"); item->name = strdup(name); if (!item->name) die("Failed to duplicate %s", name); item->index = index; item->was_top_instance = was_top_instance; item->handle = get_handle(item); list_add_tail(&item->list, &handle_list); } static void free_handles(struct list_head *list) { struct handle_list *item, *n; list_for_each_entry_safe(item, n, list, list) { list_del(&item->list); free(item->name); tracecmd_close(item->handle); free(item); } } static struct list_head inst_list; struct inst_list { struct list_head list; char *name; struct handle_list *handle; /* Identify the top instance in the input trace. */ bool was_top_instance; /* Identify the top instance in the output trace. */ bool is_top_instance; }; static void free_inst(struct list_head *list) { struct inst_list *item, *n; list_for_each_entry_safe(item, n, list, list) { list_del(&item->list); free(item->name); free(item); } } static struct inst_list *add_inst(const char *name, bool was_top_instance, bool is_top_instance) { struct inst_list *item; item = calloc(1, sizeof(*item)); if (!item) die("Failed to allocate output_file item"); item->name = strdup(name); if (!item->name) die("Failed to duplicate %s", name); item->was_top_instance = was_top_instance; item->is_top_instance = is_top_instance; list_add_tail(&item->list, &inst_list); return item; } static int create_type_len(struct tep_handle *pevent, int time, int len) { static int bigendian = -1; char *ptr; int test; if (bigendian < 0) { test = 0x4321; ptr = (char *)&test; if (*ptr == 0x21) bigendian = 0; else bigendian = 1; } if (tep_is_file_bigendian(pevent)) time |= (len << 27); else time = (time << 5) | len; return tep_read_number(pevent, &time, 4); } static int write_record(struct tracecmd_input *handle, struct tep_record *record, struct cpu_data *cpu_data, enum split_types type) { unsigned long long diff; struct tep_handle *pevent; void *page; int len = 0; char *ptr; int index = 0; int time; page = cpu_data->page; pevent = tracecmd_get_tep(handle); ptr = page + cpu_data->index; diff = record->ts - cpu_data->ts; if (diff > (1 << 27)) { /* Add a time stamp */ len = RINGBUF_TYPE_TIME_EXTEND; time = (unsigned int)(diff & ((1ULL << 27) - 1)); time = create_type_len(pevent, time, len); *(unsigned *)ptr = time; ptr += 4; time = (unsigned int)(diff >> 27); *(unsigned *)ptr = tep_read_number(pevent, &time, 4); cpu_data->ts = record->ts; cpu_data->index += 8; return 0; } if (record->size && (record->size <= 28 * 4)) len = record->size / 4; time = (unsigned)diff; time = create_type_len(pevent, time, len); memcpy(ptr, &time, 4); ptr += 4; index = 4; if (!len) { len = record->size + 4; if ((len + 4) > record->record_size) die("Bad calculation of record len (expect:%d actual:%d)", record->record_size, len + 4); *(unsigned *)ptr = tep_read_number(pevent, &len, 4); ptr += 4; index += 4; } len = (record->size + 3) & ~3; index += len; memcpy(ptr, record->data, len); cpu_data->index += index; cpu_data->ts = record->ts; return 1; } #define MISSING_EVENTS (1UL << 31) #define MISSING_STORED (1UL << 30) #define COMMIT_MASK ((1 << 27) - 1) static void write_page(struct tep_handle *pevent, struct cpu_data *cpu_data, int long_size) { unsigned long long *ptr = NULL; unsigned int flags = 0; if (cpu_data->missed_events) { flags |= MISSING_EVENTS; if (cpu_data->missed_events > 0) { flags |= MISSING_STORED; ptr = cpu_data->page + cpu_data->index; } } if (long_size == 8) { unsigned long long index = cpu_data->index - 16 + flags;; *(unsigned long long *)cpu_data->commit = tep_read_number(pevent, &index, 8); } else { unsigned int index = cpu_data->index - 12 + flags;; *(unsigned int *)cpu_data->commit = tep_read_number(pevent, &index, 4); } if (ptr) *ptr = tep_read_number(pevent, &cpu_data->missed_events, 8); write(cpu_data->fd, cpu_data->page, page_size); } static struct tep_record *read_record(struct tracecmd_input *handle, int percpu, int *cpu) { if (percpu) return tracecmd_read_data(handle, *cpu); return tracecmd_read_next_data(handle, cpu); } static void set_cpu_time(struct tracecmd_input *handle, int percpu, unsigned long long start, int cpu, int cpus) { if (percpu) { tracecmd_set_cpu_to_timestamp(handle, cpu, start); return; } for (cpu = 0; cpu < cpus; cpu++) tracecmd_set_cpu_to_timestamp(handle, cpu, start); return; } static int parse_cpu(struct tracecmd_input *handle, struct cpu_data *cpu_data, unsigned long long start, unsigned long long end, int count_limit, int percpu, int cpu, enum split_types type, bool *end_reached) { struct tep_record *record; struct tep_handle *pevent; void *ptr; int page_size; int long_size = 0; int cpus; int count = 0; int pages = 0; cpus = tracecmd_cpus(handle); long_size = tracecmd_long_size(handle); page_size = tracecmd_page_size(handle); pevent = tracecmd_get_tep(handle); /* Force new creation of first page */ if (percpu) { cpu_data[cpu].index = page_size + 1; cpu_data[cpu].page = NULL; } else { for (cpu = 0; cpu < cpus; cpu++) { cpu_data[cpu].index = page_size + 1; cpu_data[cpu].page = NULL; } } /* * Get the cpu pointers up to the start of the * start time stamp. */ record = read_record(handle, percpu, &cpu); if (start) { set_cpu_time(handle, percpu, start, cpu, cpus); while (record && record->ts < start) { tracecmd_free_record(record); record = read_record(handle, percpu, &cpu); } } else if (record) start = record->ts; while (record && (!end || record->ts <= end)) { if ((cpu_data[cpu].index + record->record_size > page_size) || record->missed_events) { if (type == SPLIT_PAGES && ++pages > count_limit) break; if (cpu_data[cpu].page) write_page(pevent, &cpu_data[cpu], long_size); else { cpu_data[cpu].page = malloc(page_size); if (!cpu_data[cpu].page) die("Failed to allocate page"); } cpu_data[cpu].missed_events = record->missed_events; memset(cpu_data[cpu].page, 0, page_size); ptr = cpu_data[cpu].page; *(unsigned long long*)ptr = tep_read_number(pevent, &(record->ts), 8); cpu_data[cpu].ts = record->ts; ptr += 8; cpu_data[cpu].commit = ptr; ptr += long_size; cpu_data[cpu].index = 8 + long_size; } cpu_data[cpu].offset = record->offset; if (write_record(handle, record, &cpu_data[cpu], type)) { tracecmd_free_record(record); record = read_record(handle, percpu, &cpu); /* if we hit the end of the cpu, clear the offset */ if (!record) { if (percpu) cpu_data[cpu].offset = 0; else for (cpu = 0; cpu < cpus; cpu++) cpu_data[cpu].offset = 0; } switch (type) { case SPLIT_NONE: break; case SPLIT_SECONDS: if (record && record->ts > (start + (unsigned long long)count_limit * 1000000000ULL)) { tracecmd_free_record(record); record = NULL; } break; case SPLIT_MSECS: if (record && record->ts > (start + (unsigned long long)count_limit * 1000000ULL)) { tracecmd_free_record(record); record = NULL; } break; case SPLIT_USECS: if (record && record->ts > (start + (unsigned long long)count_limit * 1000ULL)) { tracecmd_free_record(record); record = NULL; } break; case SPLIT_EVENTS: if (++count >= count_limit) { tracecmd_free_record(record); record = NULL; } break; default: break; } } } if (record && (record->ts > end)) *end_reached = true; else *end_reached = false; if (record) tracecmd_free_record(record); if (percpu) { if (cpu_data[cpu].page) { write_page(pevent, &cpu_data[cpu], long_size); free(cpu_data[cpu].page); cpu_data[cpu].page = NULL; } } else { for (cpu = 0; cpu < cpus; cpu++) { if (cpu_data[cpu].page) { write_page(pevent, &cpu_data[cpu], long_size); free(cpu_data[cpu].page); cpu_data[cpu].page = NULL; } } } return 0; } static char *get_temp_file(const char *output_file, const char *name, int cpu) { const char *dot; char *file = NULL; char *output; char *base; char *dir; int ret; if (name) dot = "."; else dot = name = ""; output = strdup(output_file); if (!output) die("Failed to duplicate %s", output_file); /* Extract basename() first, as dirname() truncates output */ base = basename(output); dir = dirname(output); ret = asprintf(&file, "%s/.tmp.%s.%s%s%d", dir, base, name, dot, cpu); if (ret < 0) die("Failed to allocate file for %s %s %s %d", dir, base, name, cpu); free(output); return file; } static void delete_temp_file(const char *name) { unlink(name); } static void put_temp_file(char *file) { free(file); } static void touch_file(const char *file) { int fd; fd = open(file, O_WRONLY | O_CREAT | O_TRUNC, 0644); if (fd < 0) die("could not create file %s\n", file); close(fd); } static unsigned long long parse_file(struct tracecmd_input *handle, const char *output_file, unsigned long long start, unsigned long long end, int percpu, int only_cpu, int count, enum split_types type, bool *end_reached) { unsigned long long current = 0; struct tracecmd_output *ohandle; struct inst_list *inst_entry; struct cpu_data *cpu_data; struct tep_record *record; bool all_end_reached = true; char **cpu_list; char *file; int cpus; int cpu; int ret; int fd; ohandle = tracecmd_copy(handle, output_file, TRACECMD_FILE_CMD_LINES, 0, NULL); tracecmd_set_out_clock(ohandle, tracecmd_get_trace_clock(handle)); list_for_each_entry(inst_entry, &inst_list, list) { struct tracecmd_input *curr_handle; bool curr_end_reached = false; curr_handle = inst_entry->handle->handle; cpus = tracecmd_cpus(curr_handle); cpu_data = malloc(sizeof(*cpu_data) * cpus); if (!cpu_data) die("Failed to allocate cpu_data for %d cpus", cpus); for (cpu = 0; cpu < cpus; cpu++) { file = get_temp_file(output_file, inst_entry->name, cpu); touch_file(file); fd = open(file, O_WRONLY | O_CREAT | O_TRUNC | O_LARGEFILE, 0644); cpu_data[cpu].cpu = cpu; cpu_data[cpu].fd = fd; cpu_data[cpu].file = file; cpu_data[cpu].offset = 0; if (start) tracecmd_set_cpu_to_timestamp(curr_handle, cpu, start); } if (only_cpu >= 0) { parse_cpu(curr_handle, cpu_data, start, end, count, 1, only_cpu, type, &curr_end_reached); } else if (percpu) { for (cpu = 0; cpu < cpus; cpu++) parse_cpu(curr_handle, cpu_data, start, end, count, percpu, cpu, type, &curr_end_reached); } else { parse_cpu(curr_handle, cpu_data, start, end, count, percpu, -1, type, &curr_end_reached); } /* End is reached when all instances finished. */ all_end_reached &= curr_end_reached; cpu_list = malloc(sizeof(*cpu_list) * cpus); if (!cpu_list) die("Failed to allocate cpu_list for %d cpus", cpus); for (cpu = 0; cpu < cpus; cpu++) cpu_list[cpu] = cpu_data[cpu].file; if (inst_entry->is_top_instance) ret = tracecmd_append_cpu_data(ohandle, cpus, cpu_list); else ret = tracecmd_append_buffer_cpu_data(ohandle, inst_entry->name, cpus, cpu_list); if (ret < 0) die("Failed to append tracing data\n"); for (cpu = 0; cpu < cpus; cpu++) { /* Set the tracecmd cursor to the next set of records */ if (cpu_data[cpu].offset) { record = tracecmd_read_at(curr_handle, cpu_data[cpu].offset, NULL); if (record && (!current || record->ts > current)) current = record->ts + 1; tracecmd_free_record(record); } } for (cpu = 0; cpu < cpus; cpu++) { close(cpu_data[cpu].fd); delete_temp_file(cpu_data[cpu].file); put_temp_file(cpu_data[cpu].file); } free(cpu_data); free(cpu_list); } tracecmd_output_close(ohandle); *end_reached = all_end_reached; return current; } /* Map the instance names to their handle. */ static void map_inst_handle(void) { struct handle_list *handle_entry; struct inst_list *inst_entry; /* * No specific instance was given for this output file. * Add all the available instances. */ if (list_empty(&inst_list)) { list_for_each_entry(handle_entry, &handle_list, list) { add_inst(handle_entry->name, handle_entry->was_top_instance, handle_entry->was_top_instance); } } list_for_each_entry(inst_entry, &inst_list, list) { list_for_each_entry(handle_entry, &handle_list, list) { if ((inst_entry->was_top_instance && handle_entry->was_top_instance) || (!inst_entry->was_top_instance && !strcmp(handle_entry->name, inst_entry->name))) { inst_entry->handle = handle_entry; goto found; } } warning("Requested instance %s was not found in trace.", inst_entry->name); break; found: continue; } } static bool is_top_instance_unique(void) { struct inst_list *inst_entry; bool has_top_buffer = false; /* Check there is at most one top buffer. */ list_for_each_entry(inst_entry, &inst_list, list) { if (inst_entry->is_top_instance) { if (has_top_buffer) return false; has_top_buffer = true; } } return true; } enum { OPT_top = 237, }; /* * Used to identify the arg. previously parsed. * E.g. '-b' can only follow '--top'. */ enum prev_arg_type { PREV_IS_NONE, PREV_IS_TOP, PREV_IS_BUFFER, }; void trace_split (int argc, char **argv) { struct tracecmd_input *handle; unsigned long long start_ns = 0, end_ns = 0; unsigned long long current; enum prev_arg_type prev_arg_type; struct inst_list *prev_inst = NULL; int prev_arg_idx; bool end_reached = false; double start, end; char *endptr; char *output = NULL; char *output_file; enum split_types split_type = SPLIT_NONE; enum split_types type = SPLIT_NONE; int instances; int count; int repeat = 0; int percpu = 0; int cpu = -1; int ac; int c; static struct option long_options[] = { {"top", optional_argument, NULL, OPT_top}, {NULL, 0, NULL, 0}, }; int option_index = 0; prev_arg_type = PREV_IS_NONE; list_head_init(&handle_list); list_head_init(&inst_list); if (strcmp(argv[1], "split") != 0) usage(argv); while ((c = getopt_long(argc - 1, argv + 1, "+ho:i:s:m:u:e:p:rcC:B:b:t", long_options, &option_index)) >= 0) { switch (c) { case 'h': usage(argv); break; case 'p': type++; case 'e': type++; case 'u': type++; case 'm': type++; case 's': type++; if (split_type != SPLIT_NONE) die("Only one type of split is allowed"); count = atoi(optarg); if (count <= 0) die("Units must be greater than 0"); split_type = type; /* Spliting by pages only makes sense per cpu */ if (type == SPLIT_PAGES) percpu = 1; break; case 'r': repeat = 1; break; case 'c': percpu = 1; break; case 'C': cpu = atoi(optarg); break; case 'o': if (output) die("only one output file allowed"); output = strdup(optarg); break; case 'i': input_file = optarg; break; case OPT_top: prev_arg_type = PREV_IS_TOP; prev_arg_idx = optind; prev_inst = add_inst(default_top_instance_name, true, true); break; case 'b': /* 1 as --top takes no argument. */ if (prev_arg_type != PREV_IS_TOP && (prev_arg_idx != optind - 1)) usage(argv); prev_arg_type = PREV_IS_NONE; prev_inst->is_top_instance = false; free(prev_inst->name); prev_inst->name = strdup(optarg); if (!prev_inst->name) die("Failed to duplicate %s", optarg); break; case 'B': prev_arg_type = PREV_IS_BUFFER; prev_arg_idx = optind; prev_inst = add_inst(optarg, false, false); break; case 't': /* 2 as -B takes an argument. */ if (prev_arg_type != PREV_IS_BUFFER && (prev_arg_idx != optind - 2)) usage(argv); prev_arg_type = PREV_IS_NONE; prev_inst->is_top_instance = true; break; default: usage(argv); } } if (!is_top_instance_unique()) die("Can only have one top instance."); ac = (argc - optind); if (ac >= 2) { optind++; start = strtod(argv[optind], &endptr); if (ac > 3) usage(argv); /* Make sure a true start value was entered */ if (*endptr != 0) die("Start value not floating point: %s", argv[optind]); start_ns = (unsigned long long)(start * 1000000000.0); optind++; if (ac == 3) { end = strtod(argv[optind], &endptr); /* Make sure a true end value was entered */ if (*endptr != 0) die("End value not floating point: %s", argv[optind]); end_ns = (unsigned long long)(end * 1000000000.0); if (end_ns < start_ns) die("Error: end is less than start"); } } if (!input_file) input_file = default_input_file; handle = tracecmd_open(input_file, 0); if (!handle) die("error reading %s", input_file); if (tracecmd_get_file_state(handle) == TRACECMD_FILE_CPU_LATENCY) die("trace-cmd split does not work with latency traces\n"); page_size = tracecmd_page_size(handle); if (!output) output = strdup(input_file); if (!repeat && strcmp(output, input_file) == 0) { output = realloc(output, strlen(output) + 3); strcat(output, ".1"); } output_file = malloc(strlen(output) + 50); if (!output_file) die("Failed to allocate for %s", output); c = 1; add_handle(default_top_instance_name, -1, true); instances = tracecmd_buffer_instances(handle); if (instances) { const char *name; int i; for (i = 0; i < instances; i++) { name = tracecmd_buffer_instance_name(handle, i); if (!name) die("error in reading buffer instance"); add_handle(name, i, false); } } map_inst_handle(); do { if (repeat) sprintf(output_file, "%s.%04d", output, c++); else strcpy(output_file, output); current = parse_file(handle, output_file, start_ns, end_ns, percpu, cpu, count, type, &end_reached); if (!repeat) break; start_ns = 0; } while (!end_reached && (current && (!end_ns || current < end_ns))); free(output); free(output_file); tracecmd_close(handle); free_handles(&handle_list); free_inst(&inst_list); return; } trace-cmd-v3.3.1/tracecmd/trace-sqlhist.c000066400000000000000000000121331470231550600202570ustar00rootroot00000000000000#include #include #include #include #include #include #include #include "trace-local.h" enum action { ACTION_DEFAULT = 0, ACTION_SNAPSHOT = (1 << 0), ACTION_TRACE = (1 << 1), ACTION_SAVE = (1 << 2), ACTION_MAX = (1 << 3), ACTION_CHANGE = (1 << 4), }; #define ACTIONS ((ACTION_MAX - 1)) static int do_sql(const char *instance_name, const char *buffer, const char *name, const char *var, const char *trace_dir, bool execute, int action, char **save_fields) { struct tracefs_synth *synth; struct tep_handle *tep; struct trace_seq seq; enum tracefs_synth_handler handler; char *err; int ret; if ((action & ACTIONS) && !var) die("Error: -s, -S and -T not supported without -m or -c"); if (!name) name = "Anonymous"; trace_seq_init(&seq); tep = tracefs_local_events(trace_dir); if (!tep) die("Could not read %s", trace_dir ? trace_dir : "tracefs directory"); synth = tracefs_sql(tep, name, buffer, &err); if (!synth) die("Failed creating synthetic event!\n%s", err ? err : ""); if (tracefs_synth_complete(synth)) { if (var) { if (action & ACTION_MAX) handler = TRACEFS_SYNTH_HANDLE_MAX; else handler = TRACEFS_SYNTH_HANDLE_CHANGE; /* Default to trace if other actions are not set */ if (!(action & (ACTION_SAVE | ACTION_SNAPSHOT))) action |= ACTION_TRACE; if (action & ACTION_SAVE) { ret = tracefs_synth_save(synth, handler, var, save_fields); if (ret < 0) { err = "adding save"; goto failed_action; } } if (action & ACTION_TRACE) { /* * By doing the trace before snapshot, it will be included * in the snapshot. */ ret = tracefs_synth_trace(synth, handler, var); if (ret < 0) { err = "adding trace"; goto failed_action; } } if (action & ACTION_SNAPSHOT) { ret = tracefs_synth_snapshot(synth, handler, var); if (ret < 0) { err = "adding snapshot"; failed_action: perror(err); if (errno == ENODEV) fprintf(stderr, "ERROR: '%s' is not a variable\n", var); exit(-1); } } } tracefs_synth_echo_cmd(&seq, synth); if (execute) { ret = tracefs_synth_create(synth); if (ret < 0) die("%s\n", tracefs_error_last(NULL)); } } else { struct tracefs_instance *instance = NULL; struct tracefs_hist *hist; hist = tracefs_synth_get_start_hist(synth); if (!hist) die("get_start_hist"); if (instance_name) { if (execute) instance = tracefs_instance_create(instance_name); else instance = tracefs_instance_alloc(trace_dir, instance_name); if (!instance) die("Failed to create instance"); } tracefs_hist_echo_cmd(&seq, instance, hist, 0); if (execute) { ret = tracefs_hist_start(instance, hist); if (ret < 0) die("%s\n", tracefs_error_last(instance)); } } tracefs_synth_free(synth); trace_seq_do_printf(&seq); trace_seq_destroy(&seq); return 0; } void trace_sqlhist (int argc, char **argv) { char *trace_dir = NULL; char *buffer = NULL; char buf[BUFSIZ]; int buffer_size = 0; const char *file = NULL; const char *instance = NULL; bool execute = false; char **save_fields = NULL; const char *name; const char *var; char **save_argv; int action = 0; char *tok; FILE *fp; size_t r; int c; int i; /* Remove 'trace-cmd' */ save_argv = argv; argc -= 1; argv += 1; if (argc < 2) usage(save_argv); for (;;) { c = getopt(argc, argv, "ht:f:en:m:c:sS:TB:"); if (c == -1) break; switch(c) { case 'h': usage(save_argv); case 't': trace_dir = optarg; break; case 'f': file = optarg; break; case 'e': execute = true; break; case 'm': action |= ACTION_MAX; var = optarg; break; case 'c': action |= ACTION_CHANGE; var = optarg; break; case 's': action |= ACTION_SNAPSHOT; break; case 'S': action |= ACTION_SAVE; tok = strtok(optarg, ","); while (tok) { save_fields = tracefs_list_add(save_fields, tok); tok = strtok(NULL, ","); } if (!save_fields) { perror(optarg); exit(-1); } break; case 'T': action |= ACTION_TRACE | ACTION_SNAPSHOT; break; case 'B': instance = optarg; break; case 'n': name = optarg; break; } } if ((action & (ACTION_MAX|ACTION_CHANGE)) == (ACTION_MAX|ACTION_CHANGE)) { fprintf(stderr, "Can not use both -m and -c together\n"); exit(-1); } if (file) { if (!strcmp(file, "-")) fp = stdin; else fp = fopen(file, "r"); if (!fp) { perror(file); exit(-1); } while ((r = fread(buf, 1, BUFSIZ, fp)) > 0) { buffer = realloc(buffer, buffer_size + r + 1); strncpy(buffer + buffer_size, buf, r); buffer_size += r; } fclose(fp); if (buffer_size) buffer[buffer_size] = '\0'; } else if (argc == optind) { usage(save_argv); } else { for (i = optind; i < argc; i++) { r = strlen(argv[i]); buffer = realloc(buffer, buffer_size + r + 2); if (i != optind) buffer[buffer_size++] = ' '; strcpy(buffer + buffer_size, argv[i]); buffer_size += r; } } do_sql(instance, buffer, name, var, trace_dir, execute, action, save_fields); free(buffer); } trace-cmd-v3.3.1/tracecmd/trace-stack.c000066400000000000000000000073661470231550600177110ustar00rootroot00000000000000// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt * */ #include #include #include #include #include #include #include #include #include #include #include #include "tracefs.h" #include "trace-local.h" #define PROC_FILE "/proc/sys/kernel/stack_tracer_enabled" enum stack_type { STACK_START, STACK_STOP, STACK_RESET, STACK_REPORT }; static void test_available(void) { struct stat buf; int fd; fd = stat(PROC_FILE, &buf); if (fd < 0) die("stack tracer not configured on running kernel"); } /* NOTE: this implementation only accepts new_status in the range [0..9]. */ static void change_stack_tracer_status(unsigned new_status) { char buf[1]; int status; int ret; int fd; int n; if (new_status > 9) { warning("invalid status %d\n", new_status); return; } ret = tracecmd_stack_tracer_status(&status); if (ret < 0) die("error reading %s", PROC_FILE); if (ret > 0 && status == new_status) return; /* nothing to do */ fd = open(PROC_FILE, O_WRONLY); if (fd < 0) die("writing %s", PROC_FILE); buf[0] = new_status + '0'; n = write(fd, buf, 1); if (n < 0) die("writing into %s", PROC_FILE); close(fd); } static void start_trace(void) { change_stack_tracer_status(1); } static void stop_trace(void) { change_stack_tracer_status(0); } static void reset_trace(void) { char *path; char buf[1]; int fd; int n; path = tracefs_get_tracing_file("stack_max_size"); fd = open(path, O_WRONLY); if (fd < 0) die("writing %s", path); buf[0] = '0'; n = write(fd, buf, 1); if (n < 0) die("writing into %s", path); tracefs_put_tracing_file(path); close(fd); } static void read_trace(void) { char *buf = NULL; int status; char *path; FILE *fp; size_t n; int r; if (tracecmd_stack_tracer_status(&status) <= 0) die("Invalid stack tracer state"); if (status > 0) printf("(stack tracer running)\n"); else printf("(stack tracer not running)\n"); path = tracefs_get_tracing_file("stack_trace"); fp = fopen(path, "r"); if (!fp) die("reading to '%s'", path); tracefs_put_tracing_file(path); while ((r = getline(&buf, &n, fp)) >= 0) { /* * Skip any line that starts with a '#'. * Those talk about how to enable stack tracing * within the debugfs system. We don't care about that. */ if (buf[0] != '#') printf("%s", buf); free(buf); buf = NULL; } fclose(fp); } enum { OPT_verbose = 252, OPT_reset = 253, OPT_stop = 254, OPT_start = 255, }; void trace_stack (int argc, char **argv) { enum stack_type trace_type = STACK_REPORT; int c; if (argc < 2) usage(argv); if (strcmp(argv[1], "stack") != 0) usage(argv); for (;;) { int option_index = 0; static struct option long_options[] = { {"start", no_argument, NULL, OPT_start}, {"stop", no_argument, NULL, OPT_stop}, {"reset", no_argument, NULL, OPT_reset}, {"help", no_argument, NULL, '?'}, {"verbose", optional_argument, NULL, OPT_verbose}, {NULL, 0, NULL, 0} }; c = getopt_long (argc-1, argv+1, "+h?", long_options, &option_index); if (c == -1) break; switch (c) { case 'h': usage(argv); break; case OPT_start: trace_type = STACK_START; break; case OPT_stop: trace_type = STACK_STOP; break; case OPT_reset: trace_type = STACK_RESET; break; case OPT_verbose: if (trace_set_verbose(optarg) < 0) die("invalid verbose level %s", optarg); break; default: usage(argv); } } test_available(); switch (trace_type) { case STACK_START: start_trace(); break; case STACK_STOP: stop_trace(); break; case STACK_RESET: reset_trace(); break; default: read_trace(); break; } return; } trace-cmd-v3.3.1/tracecmd/trace-stat.c000066400000000000000000000424211470231550600175460ustar00rootroot00000000000000// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2014 Red Hat Inc, Steven Rostedt * */ #include #include #include #include #include #include #include #include #include "tracefs.h" #include "trace-local.h" #ifndef BUFSIZ #define BUFSIZ 1024 #endif static inline int is_top_instance(struct buffer_instance *instance) { return instance == &top_instance; } char *strstrip(char *str) { char *s; if (!str) return NULL; s = str + strlen(str) - 1; while (s >= str && isspace(*s)) s--; s++; *s = '\0'; for (s = str; *s && isspace(*s); s++) ; return s; } /* FIXME: append_file() is duplicated and could be consolidated */ char *append_file(const char *dir, const char *name) { char *file; int ret; ret = asprintf(&file, "%s/%s", dir, name); if (ret < 0) die("Failed to allocate %s/%s", dir, name); return file; } static char *get_fd_content(int fd, const char *file) { size_t total = 0; size_t alloc; char *str = NULL; int ret; for (;;) { alloc = ((total + BUFSIZ) / BUFSIZ) * BUFSIZ; str = realloc(str, alloc + 1); if (!str) die("malloc"); ret = read(fd, str + total, alloc - total); if (ret < 0) die("reading %s\n", file); total += ret; if (!ret) break; } str[total] = 0; return str; } char *get_file_content(const char *file) { char *str; int fd; fd = open(file, O_RDONLY); if (fd < 0) return NULL; str = get_fd_content(fd, file); close(fd); return str; } static void report_file(struct buffer_instance *instance, char *name, char *def_value, char *description) { char *str; char *cont; if (!tracefs_file_exists(instance->tracefs, name)) return; str = tracefs_instance_file_read(instance->tracefs, name, NULL); if (!str) return; cont = strstrip(str); if (cont[0] && strcmp(cont, def_value) != 0) printf("\n%s%s\n", description, cont); free(str); } static int report_instance(const char *name, void *data) { bool *first = (bool *)data; if (*first) { *first = false; printf("\nInstances:\n"); } printf(" %s\n", name); return 0; } static void report_instances(void) { bool first = true; tracefs_instances_walk(report_instance, &first); } struct event_iter *trace_event_iter_alloc(const char *path) { struct event_iter *iter; iter = malloc(sizeof(*iter)); if (!iter) die("Failed to allocate event_iter for path %s", path); memset(iter, 0, sizeof(*iter)); iter->system_dir = opendir(path); if (!iter->system_dir) die("opendir"); return iter; } enum event_iter_type trace_event_iter_next(struct event_iter *iter, const char *path, const char *system) { struct dirent *dent; if (system && !iter->event_dir) { char *event; struct stat st; event = append_file(path, system); stat(event, &st); if (!S_ISDIR(st.st_mode)) { free(event); goto do_system; } iter->event_dir = opendir(event); if (!iter->event_dir) die("opendir %s", event); free(event); } if (iter->event_dir) { while ((dent = readdir(iter->event_dir))) { const char *name = dent->d_name; if (strcmp(name, ".") == 0 || strcmp(name, "..") == 0) continue; iter->event_dent = dent; return EVENT_ITER_EVENT; } closedir(iter->event_dir); iter->event_dir = NULL; } do_system: while ((dent = readdir(iter->system_dir))) { const char *name = dent->d_name; if (strcmp(name, ".") == 0 || strcmp(name, "..") == 0) continue; iter->system_dent = dent; return EVENT_ITER_SYSTEM; } return EVENT_ITER_NONE; } void trace_event_iter_free(struct event_iter *iter) { if (!iter) return; if (iter->event_dir) closedir(iter->event_dir); closedir(iter->system_dir); free(iter); } static void reset_event_iter(struct event_iter *iter) { if (iter->event_dir) { closedir(iter->event_dir); iter->event_dir = NULL; } rewinddir(iter->system_dir); } static int process_individual_events(const char *path, struct event_iter *iter) { struct stat st; const char *system = iter->system_dent->d_name; char *file; char *enable = NULL; char *str; int ret = 0; file = append_file(path, system); stat(file, &st); if (!S_ISDIR(st.st_mode)) goto out; enable = append_file(file, "enable"); str = get_file_content(enable); if (!str) goto out; if (*str != '1' && *str != '0') ret = 1; free(str); out: free(enable); free(file); return ret; } static void process_event_enable(char *path, const char *system, const char *name, enum event_process *processed) { struct stat st; char *enable = NULL; char *file; char *str; if (system) path = append_file(path, system); file = append_file(path, name); if (system) free(path); stat(file, &st); if (!S_ISDIR(st.st_mode)) goto out; enable = append_file(file, "enable"); str = get_file_content(enable); if (!str) goto out; if (*str == '1') { if (!system) { if (!*processed) printf(" Individual systems:\n"); printf( " %s\n", name); *processed = PROCESSED_SYSTEM; } else { if (!*processed) { printf(" Individual events:\n"); *processed = PROCESSED_SYSTEM; } if (*processed == PROCESSED_SYSTEM) { printf(" %s\n", system); *processed = PROCESSED_EVENT; } printf( " %s\n", name); } } free(str); out: free(enable); free(file); } static void report_events(struct buffer_instance *instance) { struct event_iter *iter; char *str; char *cont; char *path; char *system; enum event_iter_type type; enum event_process processed = PROCESSED_NONE; enum event_process processed_part = PROCESSED_NONE; str = tracefs_instance_file_read(instance->tracefs, "events/enable", NULL); if (!str) return; cont = strstrip(str); printf("\nEvents:\n"); switch(*cont) { case '1': printf(" All enabled\n"); free(str); return; case '0': printf(" All disabled\n"); free(str); return; } free(str); path = tracefs_instance_get_file(instance->tracefs, "events"); if (!path) die("malloc"); iter = trace_event_iter_alloc(path); while (trace_event_iter_next(iter, path, NULL)) { process_event_enable(path, NULL, iter->system_dent->d_name, &processed); } reset_event_iter(iter); system = NULL; while ((type = trace_event_iter_next(iter, path, system))) { if (type == EVENT_ITER_SYSTEM) { /* Only process systems that are not fully enabled */ if (!process_individual_events(path, iter)) continue; system = iter->system_dent->d_name; if (processed_part) processed_part = PROCESSED_SYSTEM; continue; } process_event_enable(path, iter->system_dent->d_name, iter->event_dent->d_name, &processed_part); } trace_event_iter_free(iter); if (!processed && !processed_part) printf(" (none enabled)\n"); tracefs_put_tracing_file(path); } static void process_event_filter(char *path, struct event_iter *iter, enum event_process *processed) { const char *system = iter->system_dent->d_name; const char *event = iter->event_dent->d_name; struct stat st; char *filter = NULL; char *file; char *str; char *cont; path = append_file(path, system); file = append_file(path, event); free(path); stat(file, &st); if (!S_ISDIR(st.st_mode)) goto out; filter = append_file(file, "filter"); str = get_file_content(filter); if (!str) goto out; cont = strstrip(str); if (strcmp(cont, "none") == 0) { free(str); goto out; } if (!*processed) printf("\nFilters:\n"); printf( " %s:%s \"%s\"\n", system, event, cont); *processed = PROCESSED_SYSTEM; free(str); out: free(filter); free(file); } static void report_event_filters(struct buffer_instance *instance) { struct event_iter *iter; char *path; char *system; enum event_iter_type type; enum event_process processed = PROCESSED_NONE; path = tracefs_instance_get_file(instance->tracefs, "events"); if (!path) die("malloc"); iter = trace_event_iter_alloc(path); processed = PROCESSED_NONE; system = NULL; while ((type = trace_event_iter_next(iter, path, system))) { if (type == EVENT_ITER_SYSTEM) { system = iter->system_dent->d_name; continue; } process_event_filter(path, iter, &processed); } trace_event_iter_free(iter); tracefs_put_tracing_file(path); } static void process_event_trigger(char *path, struct event_iter *iter, enum event_process *processed) { const char *system = iter->system_dent->d_name; const char *event = iter->event_dent->d_name; struct stat st; char *trigger = NULL; char *file; char *str; char *cont; path = append_file(path, system); file = append_file(path, event); free(path); stat(file, &st); if (!S_ISDIR(st.st_mode)) goto out; trigger = append_file(file, "trigger"); str = get_file_content(trigger); if (!str) goto out; cont = strstrip(str); if (cont[0] == '#') { free(str); goto out; } if (!*processed) printf("\nTriggers:\n"); printf( " %s:%s \"%s\"\n", system, event, cont); *processed = PROCESSED_SYSTEM; free(str); out: free(trigger); free(file); } static void report_event_triggers(struct buffer_instance *instance) { struct event_iter *iter; char *path; char *system; enum event_iter_type type; enum event_process processed = PROCESSED_NONE; path = tracefs_instance_get_file(instance->tracefs, "events"); if (!path) die("malloc"); iter = trace_event_iter_alloc(path); processed = PROCESSED_NONE; system = NULL; while ((type = trace_event_iter_next(iter, path, system))) { if (type == EVENT_ITER_SYSTEM) { system = iter->system_dent->d_name; continue; } process_event_trigger(path, iter, &processed); } trace_event_iter_free(iter); tracefs_put_tracing_file(path); } enum func_states { FUNC_STATE_START, FUNC_STATE_SKIP, FUNC_STATE_PRINT, }; static void list_functions(const char *path, char *string) { enum func_states state; struct stat st; char *str; int ret = 0; int len; int i; int first = 0; /* Ignore if it does not exist. */ ret = stat(path, &st); if (ret < 0) return; str = get_file_content(path); if (!str) return; len = strlen(str); state = FUNC_STATE_START; /* Skip all lines that start with '#' */ for (i = 0; i < len; i++) { if (state == FUNC_STATE_PRINT) putchar(str[i]); if (str[i] == '\n') { state = FUNC_STATE_START; continue; } if (state == FUNC_STATE_SKIP) continue; if (state == FUNC_STATE_START && str[i] == '#') { state = FUNC_STATE_SKIP; continue; } if (!first) { printf("\n%s:\n", string); first = 1; } if (state != FUNC_STATE_PRINT) { state = FUNC_STATE_PRINT; printf(" "); putchar(str[i]); } } free(str); } static void report_graph_funcs(struct buffer_instance *instance) { char *path; path = tracefs_instance_get_file(instance->tracefs, "set_graph_function"); if (!path) die("malloc"); list_functions(path, "Function Graph Filter"); tracefs_put_tracing_file(path); path = tracefs_instance_get_file(instance->tracefs, "set_graph_notrace"); if (!path) die("malloc"); list_functions(path, "Function Graph No Trace"); tracefs_put_tracing_file(path); } static void report_ftrace_filters(struct buffer_instance *instance) { char *path; path = tracefs_instance_get_file(instance->tracefs, "set_ftrace_filter"); if (!path) die("malloc"); list_functions(path, "Function Filter"); tracefs_put_tracing_file(path); path = tracefs_instance_get_file(instance->tracefs, "set_ftrace_notrace"); if (!path) die("malloc"); list_functions(path, "Function No Trace"); tracefs_put_tracing_file(path); } static void report_buffers(struct buffer_instance *instance) { #define FILE_SIZE 100 char *str; char *cont; char file[FILE_SIZE]; int pagesize; int bufsize; int cpu; str = tracefs_instance_file_read(instance->tracefs, "buffer_size_kb", NULL); if (!str) return; cont = strstrip(str); /* If it's not expanded yet, just skip */ if (strstr(cont, "expanded") != NULL) goto out; if (strcmp(cont, "X") != 0) { printf("\nBuffer size in kilobytes (per cpu):\n"); printf(" %s\n", str); goto total; } /* Read the sizes of each CPU buffer */ for (cpu = 0; ; cpu++) { snprintf(file, FILE_SIZE, "per_cpu/cpu%d/buffer_size_kb", cpu); str = tracefs_instance_file_read(instance->tracefs, file, NULL); if (!str) break; cont = strstrip(str); if (!cpu) putchar('\n'); printf("CPU %d buffer size (kb): %s\n", cpu, cont); free(str); } total: free(str); str = tracefs_instance_file_read(instance->tracefs, "buffer_total_size_kb", NULL); if (!str) return; cont = strstrip(str); printf("\nBuffer total size in kilobytes:\n"); printf(" %s\n", str); pagesize = getpagesize(); bufsize = tracefs_instance_get_subbuf_size(instance->tracefs); if (bufsize > 0 && bufsize * 1024 != pagesize) printf("\nSub-buffer size in kilobytes:\n %d\n", bufsize); out: free(str); } static void report_clock(struct buffer_instance *instance) { struct tracefs_instance *tracefs = instance ? instance->tracefs : NULL; char *clock; clock = tracefs_get_clock(tracefs); /* Default clock is "local", only show others */ if (clock && strcmp(clock, "local") != 0) printf("\nClock: %s\n", clock); free(clock); } static void report_cpumask(struct buffer_instance *instance) { char *str; char *cont; int cpus; int n; int i; str = tracefs_instance_file_read(instance->tracefs, "tracing_cpumask", NULL); if (!str) return; cont = strstrip(str); /* check to make sure all CPUs on this machine are set */ cpus = tracecmd_count_cpus(); for (i = strlen(cont) - 1; i >= 0 && cpus > 0; i--) { if (cont[i] == ',') continue; if (cont[i] == 'f') { cpus -= 4; continue; } if (cpus >= 4) break; if (cont[i] >= '0' && cont[i] <= '9') n = cont[i] - '0'; else n = 10 + (cont[i] - 'a'); while (cpus > 0) { if (!(n & 1)) break; n >>= 1; cpus--; } break; } /* If cpus is greater than zero, one isn't set */ if (cpus > 0) printf("\nCPU mask: %s\n", cont); free(str); } static void report_probes(struct buffer_instance *instance, const char *file, const char *string) { char *str; char *cont; int newline; int i; str = tracefs_instance_file_read(instance->tracefs, file, NULL); if (!str) return; cont = strstrip(str); if (strlen(cont) == 0) goto out; printf("\n%s:\n", string); newline = 1; for (i = 0; cont[i]; i++) { if (newline) printf(" "); putchar(cont[i]); if (cont[i] == '\n') newline = 1; else newline = 0; } putchar('\n'); out: free(str); } static void report_kprobes(struct buffer_instance *instance) { report_probes(instance, "kprobe_events", "Kprobe events"); } static void report_uprobes(struct buffer_instance *instance) { report_probes(instance, "uprobe_events", "Uprobe events"); } static void report_synthetic(struct buffer_instance *instance) { report_probes(instance, "synthetic_events", "Synthetic events"); } static void report_traceon(struct buffer_instance *instance) { char *str; char *cont; str = tracefs_instance_file_read(instance->tracefs, "tracing_on", NULL); if (!str) return; cont = strstrip(str); /* double newline as this is the last thing printed */ if (strcmp(cont, "0") == 0) printf("\nTracing is disabled\n\n"); else printf("\nTracing is enabled\n\n"); free(str); } static void stat_instance(struct buffer_instance *instance, bool opt) { if (instance != &top_instance) { if (instance != first_instance) printf("---------------\n"); printf("Instance: %s\n", tracefs_instance_get_name(instance->tracefs)); } report_file(instance, "current_tracer", "nop", "Tracer: "); report_events(instance); report_event_filters(instance); report_event_triggers(instance); report_ftrace_filters(instance); report_graph_funcs(instance); report_buffers(instance); report_clock(instance); report_cpumask(instance); report_file(instance, "tracing_max_latency", "0", "Max Latency: "); report_kprobes(instance); report_uprobes(instance); report_synthetic(instance); report_file(instance, "set_event_pid", "", "Filtered event PIDs:\n"); report_file(instance, "set_event_notrace_pid", "", "Filtered notrace event PIDs:\n"); report_file(instance, "set_ftrace_pid", "no pid", "Filtered function tracer PIDs:\n"); report_file(instance, "set_ftrace_notrace_pid", "no pid", "Filtered function tracer notrace PIDs:\n"); if (opt) { printf("\nOptions:\n"); show_options(" ", instance); } report_traceon(instance); report_file(instance, "error_log", "", "Error log:\n"); if (instance == &top_instance) report_instances(); } void trace_stat (int argc, char **argv) { struct buffer_instance *instance = &top_instance; bool opt = false; int topt = 0; int status; int c; init_top_instance(); for (;;) { c = getopt(argc-1, argv+1, "htoB:"); if (c == -1) break; switch (c) { case 'h': usage(argv); break; case 'B': instance = allocate_instance(optarg); if (!instance) die("Failed to create instance"); add_instance(instance, tracecmd_count_cpus()); /* top instance requires direct access */ if (!topt && is_top_instance(first_instance)) first_instance = instance; break; case 't': /* Force to use top instance */ topt = 1; instance = &top_instance; break; case 'o': opt = 1; break; default: usage(argv); } } update_first_instance(instance, topt); for_all_instances(instance) { stat_instance(instance, opt); } if (tracecmd_stack_tracer_status(&status) >= 0) { if (status > 0) printf("Stack tracing is enabled\n\n"); } else { printf("Error reading stack tracer status\n\n"); } exit(0); } trace-cmd-v3.3.1/tracecmd/trace-stream.c000066400000000000000000000057641470231550600200770ustar00rootroot00000000000000// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2014 Red Hat Inc, Steven Rostedt * */ #include #include #include #include #include #include #include #include "trace-local.h" /* * Stream runs for a single machine. We are going to cheat * and use the trace-output and trace-input code to create * our pevent. First just create a trace.dat file and then read * it to create the pevent and handle. */ struct tracecmd_input * trace_stream_init(struct buffer_instance *instance, int cpu, int fd, int cpus, struct hook_list *hooks, tracecmd_handle_init_func handle_init, int global) { struct tracecmd_output *trace_output; struct tracecmd_input *trace_input; static FILE *fp = NULL; static int tfd; long flags; if (instance->handle) { trace_input = instance->handle; goto make_pipe; } if (!fp) { fp = tmpfile(); if (!fp) return NULL; tfd = fileno(fp); trace_output = tracecmd_output_create_fd(tfd); if (!trace_output) goto fail; tracecmd_output_write_headers(trace_output, NULL); tracecmd_output_flush(trace_output); /* Don't close the descriptor, use it for reading */ tracecmd_output_free(trace_output); } lseek(tfd, 0, SEEK_SET); trace_input = tracecmd_alloc_fd(tfd, 0); if (!trace_input) goto fail; if (tracecmd_read_headers(trace_input, TRACECMD_FILE_PRINTK) < 0) goto fail_free_input; if (handle_init) handle_init(trace_input, hooks, global); make_pipe: /* Do not block on this pipe */ flags = fcntl(fd, F_GETFL); fcntl(fd, F_SETFL, flags | O_NONBLOCK); if (tracecmd_make_pipe(trace_input, cpu, fd, cpus) < 0) goto fail_free_input; instance->handle = trace_input; return trace_input; fail_free_input: tracecmd_close(trace_input); fail: fclose(fp); fp = NULL; /* Try again later? */ return NULL; } int trace_stream_read(struct pid_record_data *pids, int nr_pids, long sleep_us) { struct pid_record_data *last_pid; struct pid_record_data *pid; struct tep_record *record; struct pollfd pollfd[nr_pids]; long sleep_ms = sleep_us > 0 ? (sleep_us + 999) / 1000 : sleep_us; int ret; int i; if (!nr_pids) return 0; last_pid = NULL; again: for (i = 0; i < nr_pids; i++) { pid = &pids[i]; if (!pid->record) pid->record = tracecmd_read_data(pid->instance->handle, pid->cpu); record = pid->record; if (!record && errno == EINVAL) /* pipe has closed */ pid->closed = 1; if (record && (!last_pid || record->ts < last_pid->record->ts)) last_pid = pid; } if (last_pid) { trace_show_data(last_pid->instance->handle, last_pid->record); tracecmd_free_record(last_pid->record); last_pid->record = NULL; return 1; } for (i = 0; i < nr_pids; i++) { /* Do not process closed pipes */ if (pids[i].closed) { memset(pollfd + i, 0, sizeof(*pollfd)); continue; } pollfd[i].fd = pids[i].brass[0]; pollfd[i].events = POLLIN; } ret = poll(pollfd, nr_pids, sleep_ms); if (ret > 0) goto again; return ret; } trace-cmd-v3.3.1/tracecmd/trace-tsync.c000066400000000000000000000170541470231550600177370ustar00rootroot00000000000000// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2021 Google Inc, Steven Rostedt * Copyright (C) 2020, VMware, Tzvetomir Stoyanov * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ #include #include #include "trace-local.h" #include "trace-msg.h" struct trace_mapping { struct tep_event *kvm_entry; struct tep_format_field *vcpu_id; struct tep_format_field *common_pid; int *pids; int *map; int *vcpu; int max_cpus; }; static int cmp_tmap_vcpu(const void *A, const void *B) { const int *a = A; const int *b = B; if (*a < *b) return -1; return *a > *b; } static int map_kvm_vcpus(int guest_pid, struct trace_mapping *tmap) { struct dirent *entry; const char *debugfs; char *vm_dir_str = NULL; char *pid_file = NULL; char *kvm_dir; int pid_file_len; bool found = false; DIR *dir; int ret = -1; int i; tmap->vcpu = malloc(sizeof(*tmap->vcpu) * tmap->max_cpus); if (!tmap->vcpu) return -1; memset(tmap->vcpu, -1, sizeof(*tmap->vcpu) * tmap->max_cpus); debugfs = tracefs_debug_dir(); if (!debugfs) return -1; if (asprintf(&kvm_dir, "%s/kvm", debugfs) < 0) return -1; dir = opendir(kvm_dir); if (!dir) goto out; if (asprintf(&pid_file, "%d-", guest_pid) <= 0) goto out; pid_file_len = strlen(pid_file); while ((entry = readdir(dir))) { if (entry->d_type != DT_DIR || strncmp(entry->d_name, pid_file, pid_file_len) != 0) continue; if (asprintf(&vm_dir_str, "%s/%s", kvm_dir, entry->d_name) < 0) goto out; found = true; break; } if (!found) goto out; closedir(dir); dir = opendir(vm_dir_str); if (!dir) goto out; i = 0; while ((entry = readdir(dir))) { if (entry->d_type != DT_DIR || strncmp(entry->d_name, "vcpu", 4)) continue; if (i == tmap->max_cpus) goto out; tmap->vcpu[i] = strtol(entry->d_name + 4, NULL, 10); i++; } if (i < tmap->max_cpus) goto out; qsort(tmap->vcpu, tmap->max_cpus, sizeof(*tmap->vcpu), cmp_tmap_vcpu); ret = 0; out: if (dir) closedir(dir); free(vm_dir_str); free(pid_file); free(kvm_dir); return ret; } static int map_vcpus(struct tep_event *event, struct tep_record *record, int cpu, void *context) { struct trace_mapping *tmap = context; unsigned long long val; int *vcpu; int type; int pid; int ret; int i; /* Do we have junk in the buffer? */ type = tep_data_type(event->tep, record); if (type != tmap->kvm_entry->id) return 0; ret = tep_read_number_field(tmap->common_pid, record->data, &val); if (ret < 0) return 0; pid = (int)val; for (i = 0; tmap->pids[i] >= 0; i++) { if (pid == tmap->pids[i]) break; } /* Is this thread one we care about ? */ if (tmap->pids[i] < 0) return 0; ret = tep_read_number_field(tmap->vcpu_id, record->data, &val); if (ret < 0) return 0; cpu = (int)val; vcpu = bsearch(&cpu, tmap->vcpu, tmap->max_cpus, sizeof(cpu), cmp_tmap_vcpu); /* Sanity check, warn? */ if (!vcpu) return 0; cpu = vcpu - tmap->vcpu; /* Already have this one? Should we check if it is the same? */ if (tmap->map[cpu] >= 0) return 0; tmap->map[cpu] = pid; /* Did we get them all */ for (i = 0; i < tmap->max_cpus; i++) { if (tmap->map[i] < 0) break; } return i == tmap->max_cpus; } static void start_mapping_vcpus(struct trace_guest *guest) { char *pids = NULL; char *t; int len = 0; int s; int i; if (!guest->task_pids) return; guest->instance = tracefs_instance_create("map_guest_pids"); if (!guest->instance) return; for (i = 0; guest->task_pids[i] >= 0; i++) { s = snprintf(NULL, 0, "%d ", guest->task_pids[i]); t = realloc(pids, len + s + 1); if (!t) { free(pids); pids = NULL; break; } pids = t; sprintf(pids + len, "%d ", guest->task_pids[i]); len += s; } if (pids) { tracefs_instance_file_write(guest->instance, "set_event_pid", pids); free(pids); } tracefs_instance_file_write(guest->instance, "events/kvm/kvm_entry/enable", "1"); } static void stop_mapping_vcpus(int cpu_count, struct trace_guest *guest) { struct trace_mapping tmap = { }; struct tep_handle *tep; const char *systems[] = { "kvm", NULL }; int i; if (!guest->instance) return; tmap.pids = guest->task_pids; tmap.max_cpus = cpu_count; tmap.map = malloc(sizeof(*tmap.map) * tmap.max_cpus); if (!tmap.map) return; /* Check if the kvm vcpu mappings are the same */ if (map_kvm_vcpus(guest->pid, &tmap) < 0) goto out; for (i = 0; i < tmap.max_cpus; i++) tmap.map[i] = -1; tracefs_instance_file_write(guest->instance, "events/kvm/kvm_entry/enable", "0"); tep = tracefs_local_events_system(NULL, systems); if (!tep) goto out; tmap.kvm_entry = tep_find_event_by_name(tep, "kvm", "kvm_entry"); if (!tmap.kvm_entry) goto out_free; tmap.vcpu_id = tep_find_field(tmap.kvm_entry, "vcpu_id"); if (!tmap.vcpu_id) goto out_free; tmap.common_pid = tep_find_any_field(tmap.kvm_entry, "common_pid"); if (!tmap.common_pid) goto out_free; tracefs_iterate_raw_events(tep, guest->instance, NULL, 0, map_vcpus, &tmap); for (i = 0; i < tmap.max_cpus; i++) { if (tmap.map[i] < 0) break; } /* We found all the mapped CPUs */ if (i == tmap.max_cpus) { guest->cpu_pid = tmap.map; guest->cpu_max = tmap.max_cpus; tmap.map = NULL; } out_free: tep_free(tep); out: free(tmap.map); tracefs_instance_destroy(guest->instance); tracefs_instance_free(guest->instance); } /** * trace_tsync_as_host - tsync from the host side * @fd: The descriptor to the peer for tsync * @trace_id: The trace_id of the host * @loop_interval: The loop interval for tsyncs that do periodic syncs * @guest_id: The id for guests (negative if this is over network) * @guest_cpus: The number of CPUs the guest has * @proto_name: The protocol name to sync with * @clock: The clock name to use for tracing * * Start the time synchronization from the host side. * This will start the mapping of the virtual CPUs to host threads * if it is a vsocket connection (not a network). * * Returns a pointer to the tsync descriptor on success or NULL on error. */ struct tracecmd_time_sync * trace_tsync_as_host(int fd, unsigned long long trace_id, int loop_interval, int guest_id, int guest_cpus, const char *proto_name, const char *clock) { struct tracecmd_time_sync *tsync; struct trace_guest *guest; int guest_pid = -1; if (fd < 0) return NULL; if (guest_id >= 0) { guest = trace_get_guest(guest_id, NULL); if (guest == NULL) return NULL; guest_pid = guest->pid; start_mapping_vcpus(guest); } tsync = tracecmd_tsync_with_guest(trace_id, loop_interval, fd, guest_pid, guest_cpus, proto_name, clock); if (guest_id >= 0) stop_mapping_vcpus(guest_cpus, guest); return tsync; } /** * trace_tsync_a_guest - tsync from the guest side * @fd: The file descriptor to the peer for tsync * @tsync_proto: The protocol name to sync with * @clock: The clock name to use for tracing * @remote_id: The id to differentiate the remote server with * @loca_id: The id to differentiate the local machine with * * Start the time synchronization from the guest side. * * Returns a pointer to the tsync descriptor on success or NULL on error. */ struct tracecmd_time_sync * trace_tsync_as_guest(int fd, const char *tsync_proto, const char *clock, unsigned int remote_id, unsigned int local_id) { struct tracecmd_time_sync *tsync = NULL; if (fd < 0) return NULL; tsync = tracecmd_tsync_with_host(fd, tsync_proto, clock, remote_id, local_id); if (!tsync) { warning("Failed to negotiate timestamps synchronization with the host"); return NULL; } return tsync; } trace-cmd-v3.3.1/tracecmd/trace-usage.c000066400000000000000000000601141470231550600176760ustar00rootroot00000000000000// SPDX-License-Identifier: GPL-2.0 #include #include #include #include #include "trace-local.h" #include "version.h" struct usage_help { char *name; char *short_help; char *long_help; }; static struct usage_help usage_help[] = { { "record", "record a trace into a trace.dat file", " %s record [-v][-e event [-f filter]][-p plugin][-F][-d][-D][-o file] \\\n" " [-q][-s usecs][-O option ][-l func][-g func][-n func] \\\n" " [-P pid][-N host:port][-t][-r prio][-b size][-B buf] \\\n" " [--proxy vsock][command ...]\n" " [-m max][-C clock]\n" " -e run command with event enabled\n" " -f filter for previous -e event\n" " -R trigger for previous -e event\n" " -p run command with plugin enabled\n" " -F filter only on the given process\n" " -P trace the given pid like -F for the command\n" " -c also trace the children of -F (or -P if kernel supports it)\n" " -C set the trace clock\n" " -T do a stacktrace on all events\n" " -l filter function name\n" " -g set graph function\n" " -n do not trace function\n" " -m max size per CPU in kilobytes\n" " -M set CPU mask to trace\n" " -v will negate all -e (disable those events) and -B (delete those instances) after it\n" " -d disable function tracer when running\n" " -D Full disable of function tracing (for all users)\n" " -o data output file [default trace.dat]\n" " -O option to enable (or disable)\n" " -r real time priority to run the capture threads\n" " -s sleep interval between recording (in usecs) [default: 1000]\n" " -S used with --profile, to enable only events in command line\n" " -N host:port to connect to (see listen)\n" " -V cid:port to connect to via vsocket (see listen)\n" " -t used with -N, forces use of tcp in live trace\n" " -b change kernel buffersize (in kilobytes per CPU)\n" " -B create sub buffer and following events will be enabled here\n" " -k do not reset the buffers after tracing.\n" " -i do not fail if an event is not found\n" " -q print no output to the screen\n" " -G when profiling, set soft and hard irqs as global\n" " --quiet print no output to the screen\n" " --temp specify a directory to store the temp files used to create trace.dat\n" " --subbuf-size to specify the sub-buffer size in kilobytes\n" " --module filter module name\n" " --by-comm used with --profile, merge events for related comms\n" " --profile enable tracing options needed for report --profile\n" " --func-stack perform a stack trace for function tracer\n" " (use with caution)\n" " --max-graph-depth limit function_graph depth\n" " --cmdlines-size change kernel saved_cmdlines_size\n" " --no-filter include trace-cmd threads in the trace\n" " --proc-map save the traced processes address map into the trace.dat file\n" " --user execute the specified [command ...] as given user\n" " --tsc2nsec Convert the current clock to nanoseconds, using tsc multiplier and shift from the Linux" " kernel's perf interface\n" " --tsync-interval set the loop interval, in ms, for timestamps synchronization with guests:" " If a negative number is specified, timestamps synchronization is disabled" " If 0 is specified, no loop is performed - timestamps offset is calculated only twice," " at the beginnig and at the end of the trace\n" " --poll don't block while reading from the trace buffer\n" " --name used with -A to give the agent a specific name\n" " --file-version set the desired trace file version\n" " --compression compress the trace output file, one of these strings can be passed:\n" " any - auto select the best available compression algorithm\n" " none - do not compress the trace file\n" " name - the name of the desired compression algorithms\n" " available algorithms can be listed with trace-cmd list -c\n" " --proxy vsocket to reach the agent. Acts the same as -A (for an agent)\n" " but will send the proxy connection to the agent.\n" " --daemonize run trace-cmd in the background as a daemon after recording has started.\n" " creates a pidfile at /var/run/trace-cmd-record.pid with the pid of trace-cmd\n" " during the recording.\n" }, { "set", "set a ftrace configuration parameter", " %s set [-v][-e event [-f filter]][-p plugin][-F][-d][-D] \\\n" " [-q][-s usecs][-O option ][-l func][-g func][-n func] \\\n" " [-P pid][-b size][-B buf][-m max][-C clock][command ...]\n" " -e enable event\n" " -f filter for previous -e event\n" " -R trigger for previous -e event\n" " -p set ftrace plugin\n" " -P set PIDs to be traced\n" " -c also trace the children of -F (or -P if kernel supports it)\n" " -C set the trace clock\n" " -T do a stacktrace on all events\n" " -l filter function name\n" " -g set graph function\n" " -n do not trace function\n" " -m max size per CPU in kilobytes\n" " -M set CPU mask to trace\n" " -v will negate all -e (disable those events) and -B (delete those instances) after it\n" " -d disable function tracer when running\n" " -D Full disable of function tracing (for all users)\n" " -O option to enable (or disable)\n" " -b change kernel buffersize (in kilobytes per CPU)\n" " -B create sub buffer and following events will be enabled here\n" " -i do not fail if an event is not found\n" " -q print no output to the screen\n" " --quiet print no output to the screen\n" " --module filter module name\n" " --func-stack perform a stack trace for function tracer\n" " (use with caution)\n" " --max-graph-depth limit function_graph depth\n" " --cmdlines-size change kernel saved_cmdlines_size\n" " --user execute the specified [command ...] as given user\n" " --fork return immediately if a command is specified\n" " --verbose 'level' Set the desired log level\n" }, { "start", "start tracing without recording into a file", " %s start [-e event][-p plugin][-d][-O option ][-P pid]\n" " Uses same options as record.\n" " It only enables the tracing and exits\n" "\n" " --fork: If a command is specified, then return right after it forks\n" " --verbose 'level' Set the desired log level\n" }, { "extract", "extract a trace from the kernel", " %s extract [-p plugin][-O option][-o file][-B buf][-s][-a][-t]\n" " Uses similar options as record, but only reads an existing trace.\n" " -s : extract the snapshot instead of the main buffer\n" " -B : extract a given buffer (more than one may be specified)\n" " -a : extract all buffers (except top one)\n" " -t : extract the top level buffer (useful with -B and -a)\n" " --verbose 'level' Set the desired log level\n" }, { "stop", "stop the kernel from recording trace data", " %s stop [-B buf [-B buf]..] [-a] [-t]\n" " Stops the tracer from recording more data.\n" " Used in conjunction with start\n" " -B stop a given buffer (more than one may be specified)\n" " -a stop all buffers (except top one)\n" " -t stop the top level buffer (useful with -B or -a)\n" }, { "restart", "restart the kernel trace data recording", " %s restart [-B buf [-B buf]..] [-a] [-t]\n" " Restarts recording after a trace-cmd stop.\n" " Used in conjunction with stop\n" " -B restart a given buffer (more than one may be specified)\n" " -a restart all buffers (except top one)\n" " -t restart the top level buffer (useful with -B or -a)\n" }, { "show", "show the contents of the kernel tracing buffer", " %s show [-p|-s][-c cpu][-B buf][options]\n" " Basically, this is a cat of the trace file.\n" " -p read the trace_pipe file instead\n" " -s read the snapshot file instance\n" " (Can't have both -p and -s)\n" " -c just show the file associated with a given CPU\n" " -B read from a tracing buffer instance.\n" " -f display the file path that is being dumped\n" " The following options shows the corresponding file name\n" " and then exits.\n" " --tracing_on\n" " --current_tracer\n" " --buffer_size (for buffer_size_kb)\n" " --buffer_total_size (for buffer_total_size_kb)\n" " --buffer_subbuf_size (for buffer_subbuf_size_kb)\n" " --buffer_percent (for buffer_percent)\n" " --ftrace_filter (for set_ftrace_filter)\n" " --ftrace_notrace (for set_ftrace_notrace)\n" " --ftrace_pid (for set_ftrace_pid)\n" " --graph_function (for set_graph_function)\n" " --graph_notrace (for set_graph_notrace)\n" " --cpumask (for tracing_cpumask)\n" }, { "reset", "disable all kernel tracing and clear the trace buffers", " %s reset [-b size][-B buf][-a][-d][-t]\n" " Disables the tracer (may reset trace file)\n" " Used in conjunction with start\n" " -b change the kernel buffer size (in kilobytes per CPU)\n" " -d delete the previous specified instance\n" " -B reset the given buffer instance (may specify multiple -B)\n" " -a reset all instances (except top one)\n" " -t reset the top level instance (useful with -B or -a)\n" }, { "clear", "clear the trace buffers", " %s clear [-B buf][-a]\n" " -B clear the given buffer (may specify multiple -B)\n" " -a clear all existing buffers, including the top level one\n" }, { "report", "read out the trace stored in a trace.dat file", " %s report [-i file] [--cpu cpu] [-e][-f][-l][-P][-L][-N][-R][-E]\\\n" " [-r events][-n events][-F filter][-v][-V[1-6]][-T][-O option]\n" " [-H [start_system:]start_event,start_match[,pid]/[end_system:]end_event,end_match[,flags]\n" " [-G]\n" " -i input file [default trace.dat]\n" " -e show file endianess\n" " -f show function mapping list\n" " -P show printk list\n" " -E show event files stored\n" " -F filter to filter output on\n" " -I filter out events with the HARDIRQ flag set\n" " -S filter out events with the SOFTIRQ flag set\n" " -t print out full timestamp. Do not truncate to 6 places.\n" " -R raw format: ignore print format and only show field data\n" " -r raw format the events that match the option\n" " -v will negate all -F after it (Not show matches)\n" " -T print out the filter strings created and exit\n" " -V[level] verbose (shows plugins being loaded)\n" " With optional level (see --verbose numbers)\n" " -L load only local (~/.trace-cmd/plugins) plugins\n" " -N do not load any plugins\n" " -n ignore plugin handlers for events that match the option\n" " -w show wakeup latencies\n" " -l show latency format (default with latency tracers)\n" " -O plugin option -O [plugin:]var[=val]\n" " --cpu - filter events according to the given cpu list.\n" " A range of CPUs can be specified using 'cpuX-cpuY' notation.\n" " --cpus - List the CPUs that have content in it then exit.\n" " --first-event - Show the timestamp of the first event for all CPUs.\n" " --last-event - Show the timestamp of the last event for all CPUs.\n" " --check-events return whether all event formats can be parsed\n" " --stat - show the buffer stats that were reported at the end of the record.\n" " --uname - show uname of the record, if it was saved\n" " --version - show version used to build the trace-cmd exec that created the file\n" " --profile report stats on where tasks are blocked and such\n" " -G when profiling, set soft and hard irqs as global\n" " -H Allows users to hook two events together for timings\n" " (used with --profile)\n" " --by-comm used with --profile, merge events for related comms\n" " --ts-offset will add amount to timestamp of all events of the\n" " previous data file.\n" " --ts2secs HZ, pass in the timestamp frequency (per second)\n" " to convert the displayed timestamps to seconds\n" " Affects the previous data file, unless there was no\n" " previous data file, in which case it becomes default\n" " --ts-diff Show the delta timestamp between events.\n" " --ts-check Check to make sure no time stamp on any CPU goes backwards.\n" " --nodate Ignore the --date processing of trace-cmd record.\n" " --raw-ts Display raw timestamps, without any corrections.\n" " --align-ts Display timestamps aligned to the first event.\n" " --verbose[=level] Set the desired log level\n" " 0 or none - no error messages\n" " 1 or crit - only critical messages\n" " 2 or err - 'crit' and error messages\n" " 3 or warn - 'err' and warning messages\n" " 4 or info - 'warn' and informational messages\n" " 5 or debug - 'info' and debugging messages\n" " 6 or all - same as debug\n" }, { "stream", "Start tracing and read the output directly", " %s stream [-e event][-p plugin][-d][-O option ][-P pid]\n" " Uses same options as record but does not write to files or the network.\n" " --verbose 'level' Set the desired log level\n" }, { "profile", "Start profiling and read the output directly", " %s profile [-e event][-p plugin][-d][-O option ][-P pid][-G][-S][-o output]\n" " [-H [start_system:]start_event,start_match[,pid]/[end_system:]end_event,end_match[,flags]\n\n" " Uses same options as record --profile.\n" " -H Allows users to hook two events together for timings\n" " --verbose 'level' Set the desired log level\n" }, { "hist", "show a histogram of the trace.dat information", " %s hist [-i file][-P] [file]" " -P ignore pids (compact all functions)\n" }, { "stat", "show the status of the running tracing (ftrace) system", " %s stat [-B buf][-t][-o]" " -B show the status of a instance buffer\n" " -t show the top level status along with buffer specified by -B\n" " -o list tracing options\n" }, { "split", "parse a trace.dat file into smaller file(s)", " %s split [options] -o file [start [end]]\n" " -o output file to write to (file.1, file.2, etc)\n" " -s n split file up by n seconds\n" " -m n split file up by n milliseconds\n" " -u n split file up by n microseconds\n" " -e n split file up by n events\n" " -p n split file up by n pages\n" " -C n select CPU n\n" " -B buffer keep buffer in resulting .dat file\n" " Use -t to promote the buffer to the top instance.\n" " -t promote preceding buffer to the top instance.\n" " Must follow -B.\n" " --top keep top buffer in resulting .dat file.\n" " -b new name of the top instance. Must follow --top.\n" " -r repeat from start to end\n" " -c per cpu, that is -p 2 will be 2 pages for each CPU\n" " if option is specified, it will split the file\n" " up starting at start, and ending at end\n" " start - decimal start time in seconds (ex: 75678.923853)\n" " if left out, will start at beginning of file\n" " end - decimal end time in seconds\n" }, { "options", "list the plugin options available for trace-cmd report", " %s options\n" }, { "listen", "listen on a network socket for trace clients", " %s listen -p port[-D][-o file][-d dir][-l logfile]\n" " Creates a socket to listen for clients.\n" " -p port number to listen on.\n" " -D run in daemon mode.\n" " -V listen on a vsocket instead.\n" " -o file name to use for clients.\n" " -d directory to store client files.\n" " -l logfile to write messages to.\n" " --verbose 'level' Set the desired log level\n" }, { "agent", "listen on a vsocket for trace clients", " %s agent -p port[-D][-N IP][-P cid]\n" " Creates a vsocket to listen for clients.\n" " -N Connect to IP via TCP instead of vsockets\n" " *** Insecure setting, only use on a trusted network ***\n" " *** Only use if the client is totally trusted. ***\n" " -p port number to listen on.\n" " -D run in daemon mode.\n" " -P Also act as a proxy server, with a single client denoted\n" " by a context ID (cid).\n" " --verbose 'level' Set the desired log level\n" }, { "setup-guest", "create FIFOs for tracing guest VMs", " %s setup-guest [-c cpus][-p perm][-g group][-a] guest\n" " -c number of guest virtual CPUs\n" " -p FIFOs permissions (default: 0660)\n" " -g FIFOs group owner\n" " -a Attach FIFOs to guest VM config\n" }, { "list", "list the available events, plugins or options", " %s list [-e [regex]][-t][-o][-f [regex]]\n" " -e list available events\n" " -F show event format\n" " --full show the print fmt with -F\n" " -R show event triggers\n" " -l show event filters\n" " -t list available tracers\n" " -o list available options\n" " -f [regex] list available functions to filter on\n" " -P list loaded plugin files (by path)\n" " -O list plugin options\n" " -B list defined buffer instances\n" " -C list the defined clocks (and active one)\n" " -c list the supported trace file compression algorithms\n" }, { "restore", "restore a crashed record", " %s restore [-c][-o file][-i file] cpu-file [cpu-file ...]\n" " -c create a partial trace.dat file only\n" " -o output file\n" " -i partial trace.dat file for input\n" }, { "snapshot", "take snapshot of running trace", " %s snapshot [-s][-r][-f][-B buf][-c cpu]\n" " -s take a snapshot of the trace buffer\n" " -r reset current snapshot\n" " -f free the snapshot buffer\n" " without the above three options, display snapshot\n" " -c operate on the snapshot buffer for the given CPU\n" " -B operate on the snapshot buffer for a tracing buffer instance.\n" }, { "stack", "output, enable or disable kernel stack tracing", " %s stack [--start][--stop][--reset]\n" " --start enable the stack tracer\n" " --stop disable the stack tracer\n" " --reset reset the maximum stack found\n" " --verbose 'level' Set the desired log level\n" }, { "check-events", "parse trace event formats", " %s check-events [-N]\n" " -N do not load any plugins\n" " --verbose 'level' Set the desired log level\n" }, { "dump", "read out the meta data from a trace file", " %s dump [options]\n" " -i input file, default is trace.dat\n" " -v validate a trace file\n" " --all print all meta data from a trace file\n" " --summary print a meta data summary\n" " --head-page print header page information\n" " --head-event print header event information\n" " --ftrace-events print ftrace events format\n" " --systems print recorded event systems\n" " --events print format of recorded events\n" " --kallsyms print information of the mapping of function addresses to the function names\n" " --printk print trace_printk() format strings\n" " --cmd-lines print information mapping a PID to a process name\n" " --options print options\n" " --flyrecord information of offset and count of recorded events per CPU\n" " --clock trace clock, saved in the file\n" " -h, --help show usage information\n" " --verbose 'level' Set the desired log level\n" }, { "attach", "Attach a host and guest trace.dat file", " %s attach [options] host_file guest_file vcpu_pid,...\n" " -s offset,scale,fraction[,timestamp] conversion to sync guest timestamp\n" " host_file The trace.dat file from the host\n" " guest_file The trace.dat file from the guest\n" " vcpu_pid list of process ids from the host that represent the vCPUs of the guest\n" }, { "convert", "convert trace file to different version", " %s convert [options]\n" " -i input file, default is trace.dat\n" " -o output file, mandatory parameter.\n" " The output file can be specified also as last argument of the command\n" " --file-version set the desired trace file version\n" " --compression compress the trace output file, one of these strings can be passed:\n" " any - auto select the best available compression algorithm\n" " none - do not compress the trace file\n" " name - the name of the desired compression algorithms\n" " available algorithms can be listed with trace-cmd list -c\n" }, { "sqlhist", "Run a SQL like query to create histogram or synthetic events (see man tracefs_sql(3))\n", "%s sql [-n name][-e][-s][-S fields][-m var][-c var][-T][-t dir][-f file | 'sql-command-line']\n" " -n name - name of synthetic event 'Anonymous' if left off\n" " -t dir - use dir instead of /sys/kernel/tracing\n" " -e - execute the commands to create the synthetic event\n" " -m - trigger the action when var is a new max.\n" " -c - trigger the action when var changes.\n" " -s - used with -m or -c to do a snapshot of the tracing buffer\n" " -S - used with -m or -c to save fields of the end event (comma deliminated)\n" " -T - used with -m or -c to do both a snapshot and a trace\n" " -f file - read sql lines from file otherwise from the command line\n" " if file is '-' then read from standard input.\n\n" " See man tracefs_sql(3) for sql-command-line\n" }, { NULL, NULL, NULL } }; static struct usage_help *find_help(char *cmd) { struct usage_help *help; help = usage_help; while (help->name) { if (strcmp(cmd, help->name) == 0) return help; help++; } return NULL; } void usage(char **argv) { struct usage_help *help = NULL; char *arg = argv[0]; char *p; p = basename(arg); printf("\n" "%s version %s (%s)\n\n" "usage:\n", p, VERSION_STRING, VERSION_GIT); if (argv[1]) help = find_help(argv[1]); if (help) { printf(help->long_help, p); goto out; } printf(" %s [COMMAND] ...\n\n" " commands:\n", p); help = usage_help; while (help->name) { printf(" %s - %s\n", help->name, help->short_help); help++; } out: printf("\n"); exit(-1); } void trace_usage(int argc, char **argv) { usage(argv); } trace-cmd-v3.3.1/tracecmd/trace-vm.c000066400000000000000000000246661470231550600172300ustar00rootroot00000000000000// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2008, 2009, 2010 Red Hat Inc, Steven Rostedt * Copyright (C) 2020, VMware, Tzvetomir Stoyanov * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ #include #include #include #include #include #include #include #include "trace-local.h" #include "trace-msg.h" static struct trace_guest *guests; static size_t guests_len; static struct trace_guest *get_guest_by_cid(unsigned int guest_cid) { int i; if (!guests) return NULL; for (i = 0; i < guests_len; i++) if (guest_cid == guests[i].cid) return guests + i; return NULL; } static struct trace_guest *get_guest_by_name(const char *name) { int i; if (!guests || !strlen(name)) return NULL; for (i = 0; i < guests_len; i++) if (strcmp(name, guests[i].name) == 0) return guests + i; return NULL; } bool trace_have_guests_pid(void) { for (int i = 0; i < guests_len; i++) { if (guests[i].pid < 0) return false; } return true; } /* Find all the tasks associated with the guest pid */ static void find_tasks(struct trace_guest *guest) { struct dirent *dent; char *path; DIR *dir; int ret; int tasks = 0; ret = asprintf(&path, "/proc/%d/task", guest->pid); if (ret < 0) return; dir = opendir(path); free(path); if (!dir) return; while ((dent = readdir(dir))) { int *pids; if (!(dent->d_type == DT_DIR && is_digits(dent->d_name))) continue; pids = realloc(guest->task_pids, sizeof(int) * (tasks + 2)); if (!pids) break; pids[tasks++] = strtol(dent->d_name, NULL, 0); pids[tasks] = -1; guest->task_pids = pids; } closedir(dir); } static void find_pid_by_cid(struct trace_guest *guest); static struct trace_guest *add_guest(unsigned int cid, const char *name) { struct trace_guest *guest; guests = realloc(guests, (guests_len + 1) * sizeof(*guests)); if (!guests) die("allocating new guest"); guest = &guests[guests_len++]; memset(guest, 0, sizeof(*guest)); guest->name = strdup(name); if (!guest->name) die("allocating guest name"); guest->cid = cid; guest->pid = -1; find_pid_by_cid(guest); find_tasks(guest); return guest; } static struct tracefs_instance *start_trace_connect(void) { struct tracefs_instance *open_instance; open_instance = tracefs_instance_create("vsock_find_pid"); if (!open_instance) return NULL; tracefs_event_enable(open_instance, "sched", "sched_waking"); tracefs_event_enable(open_instance, "kvm", "kvm_exit"); tracefs_trace_on(open_instance); return open_instance; } struct pids { struct pids *next; int pid; }; struct trace_fields { struct tep_event *sched_waking; struct tep_event *kvm_exit; struct tep_format_field *common_pid; struct tep_format_field *sched_next; struct pids *pids; int found_pid; }; static void free_pids(struct pids *pids) { struct pids *next; while (pids) { next = pids; pids = pids->next; free(next); } } static void add_pid(struct pids **pids, int pid) { struct pids *new_pid; new_pid = malloc(sizeof(*new_pid)); if (!new_pid) return; new_pid->pid = pid; new_pid->next = *pids; *pids = new_pid; } static bool match_pid(struct pids *pids, int pid) { while (pids) { if (pids->pid == pid) return true; pids = pids->next; } return false; } static int callback(struct tep_event *event, struct tep_record *record, int cpu, void *data) { struct trace_fields *fields = data; struct tep_handle *tep = event->tep; unsigned long long val; int flags; int type; int pid; int ret; ret = tep_read_number_field(fields->common_pid, record->data, &val); if (ret < 0) return 0; flags = tep_data_flags(tep, record); /* Ignore events in interrupts */ if (flags & (TRACE_FLAG_HARDIRQ | TRACE_FLAG_SOFTIRQ)) return 0; /* * First make sure that this event comes from a PID from * this task (or a task woken by this task) */ pid = val; if (!match_pid(fields->pids, pid)) return 0; type = tep_data_type(tep, record); /* * If this event is a kvm_exit, we have our PID * and we can stop processing. */ if (type == fields->kvm_exit->id) { fields->found_pid = pid; return -1; } if (type != fields->sched_waking->id) return 0; ret = tep_read_number_field(fields->sched_next, record->data, &val); if (ret < 0) return 0; /* This is a task woken by our task or a chain of wake ups */ add_pid(&fields->pids, (int)val); return 0; } static int find_tgid(int pid) { FILE *fp; char *path; char *buf = NULL; char *save; size_t l = 0; int tgid = -1; if (asprintf(&path, "/proc/%d/status", pid) < 0) return -1; fp = fopen(path, "r"); free(path); if (!fp) return -1; while (getline(&buf, &l, fp) > 0) { char *tok; if (strncmp(buf, "Tgid:", 5) != 0) continue; tok = strtok_r(buf, ":", &save); if (!tok) continue; tok = strtok_r(NULL, ":", &save); if (!tok) continue; while (isspace(*tok)) tok++; tgid = strtol(tok, NULL, 0); break; } free(buf); fclose(fp); return tgid; } static int stop_trace_connect(struct tracefs_instance *open_instance) { const char *systems[] = { "kvm", "sched", NULL}; struct tep_handle *tep; struct trace_fields trace_fields; int tgid = -1; if (!open_instance) return -1; /* The connection is finished, stop tracing, we have what we want */ tracefs_trace_off(open_instance); tracefs_event_disable(open_instance, NULL, NULL); tep = tracefs_local_events_system(NULL, systems); trace_fields.found_pid = -1; trace_fields.sched_waking = tep_find_event_by_name(tep, "sched", "sched_waking"); if (!trace_fields.sched_waking) goto out; trace_fields.kvm_exit = tep_find_event_by_name(tep, "kvm", "kvm_exit"); if (!trace_fields.kvm_exit) goto out; trace_fields.common_pid = tep_find_common_field(trace_fields.sched_waking, "common_pid"); if (!trace_fields.common_pid) goto out; trace_fields.sched_next = tep_find_any_field(trace_fields.sched_waking, "pid"); if (!trace_fields.sched_next) goto out; trace_fields.pids = NULL; add_pid(&trace_fields.pids, getpid()); tracefs_iterate_raw_events(tep, open_instance, NULL, 0, callback, &trace_fields); free_pids(trace_fields.pids); out: tracefs_instance_destroy(open_instance); tracefs_instance_free(open_instance); if (trace_fields.found_pid > 0) tgid = find_tgid(trace_fields.found_pid); return tgid; } /* * In order to find the guest that is associated to the given cid, * trace the sched_waking and kvm_exit events, connect to the cid * (doesn't matter what port, use -1 to not connect to anything) * and find what task gets woken up from this code and calls kvm_exit, * then that is the task that is running the guest. * Then look at the /proc//status file to find the task group * id (Tgid), and this is the PID of the task running all the threads. */ static void find_pid_by_cid(struct trace_guest *guest) { struct tracefs_instance *instance; int fd; instance = start_trace_connect(); fd = trace_vsock_open(guest->cid, -1); guest->pid = stop_trace_connect(instance); /* Just in case! */ if (fd >= 0) close(fd); } struct trace_guest *trace_get_guest(unsigned int cid, const char *name) { struct trace_guest *guest = NULL; if (name) { guest = get_guest_by_name(name); if (guest) return guest; } if (cid > 0) { guest = get_guest_by_cid(cid); if (!guest && name) guest = add_guest(cid, name); } return guest; } #define VM_CID_CMD "virsh dumpxml" #define VM_CID_LINE "= guests[i].cpu_max) continue; if (guest_cid == guests[i].cid) return guests[i].cpu_pid[guest_vcpu]; } return -1; } /** * trace_add_guest_info - Add the guest info into the trace file option * @handle: The file handle that the guest info option is added to * @instance: The instance that that represents the guest * * Adds information about the guest from the @instance into an option * for the @instance. It records the trace_id, the number of CPUs, * as well as the PIDs of the host that represent the CPUs. */ void trace_add_guest_info(struct tracecmd_output *handle, struct buffer_instance *instance) { unsigned long long trace_id; struct trace_guest *guest; const char *name; char *buf, *p; int cpus; int size; int pid; int i; if (is_network(instance)) { name = instance->name; cpus = instance->cpu_count; trace_id = instance->trace_id; } else { guest = trace_get_guest(instance->cid, NULL); if (!guest) return; cpus = guest->cpu_max; name = guest->name; /* * If this is a proxy, the trace_id of the guest is * in the guest descriptor (added in trace_tsync_as_host(). */ if (guest->trace_id) trace_id = guest->trace_id; else trace_id = instance->trace_id; } size = strlen(name) + 1; size += sizeof(long long); /* trace_id */ size += sizeof(int); /* cpu count */ size += cpus * 2 * sizeof(int); /* cpu,pid pair */ buf = calloc(1, size); if (!buf) return; p = buf; strcpy(p, name); p += strlen(name) + 1; memcpy(p, &trace_id, sizeof(long long)); p += sizeof(long long); memcpy(p, &cpus, sizeof(int)); p += sizeof(int); for (i = 0; i < cpus; i++) { if (is_network(instance)) pid = -1; else pid = guest->cpu_pid[i]; memcpy(p, &i, sizeof(int)); p += sizeof(int); memcpy(p, &pid, sizeof(int)); p += sizeof(int); } tracecmd_add_option(handle, TRACECMD_OPTION_GUEST, size, buf); free(buf); } trace-cmd-v3.3.1/tracecmd/trace-vsock.c000066400000000000000000000065721470231550600177270ustar00rootroot00000000000000#include #include #include #include #include #include "trace-cmd-private.h" int __hidden trace_vsock_open(unsigned int cid, unsigned int port) { struct sockaddr_vm addr = { .svm_family = AF_VSOCK, .svm_cid = cid, .svm_port = port, }; int sd; sd = socket(AF_VSOCK, SOCK_STREAM, 0); if (sd < 0) return -errno; if (connect(sd, (struct sockaddr *)&addr, sizeof(addr))) { close(sd); return -errno; } return sd; } int __hidden trace_vsock_make(unsigned int port) { struct sockaddr_vm addr = { .svm_family = AF_VSOCK, .svm_cid = VMADDR_CID_ANY, .svm_port = port, }; int sd; sd = socket(AF_VSOCK, SOCK_STREAM, 0); if (sd < 0) return -errno; setsockopt(sd, SOL_SOCKET, SO_REUSEADDR, &(int){1}, sizeof(int)); if (bind(sd, (struct sockaddr *)&addr, sizeof(addr))) goto error; if (listen(sd, SOMAXCONN)) goto error; return sd; error: close(sd); return -errno; } int __hidden trace_vsock_make_any(void) { return trace_vsock_make(VMADDR_PORT_ANY); } int __hidden trace_vsock_get_port(int sd, unsigned int *port) { struct sockaddr_vm addr; socklen_t addr_len = sizeof(addr); if (getsockname(sd, (struct sockaddr *)&addr, &addr_len)) return -errno; if (addr.svm_family != AF_VSOCK) return -EINVAL; if (port) *port = addr.svm_port; return 0; } int get_vsocket_params(int fd, unsigned int *lcid, unsigned int *rcid) { struct sockaddr_vm addr; socklen_t addr_len = sizeof(addr); if (lcid) { memset(&addr, 0, sizeof(addr)); if (getsockname(fd, (struct sockaddr *)&addr, &addr_len)) return -1; if (addr.svm_family != AF_VSOCK) return -1; *lcid = addr.svm_cid; } if (rcid) { memset(&addr, 0, sizeof(addr)); addr_len = sizeof(addr); if (getpeername(fd, (struct sockaddr *)&addr, &addr_len)) return -1; if (addr.svm_family != AF_VSOCK) return -1; *rcid = addr.svm_cid; } return 0; } int trace_vsock_print_connection(int fd) { struct sockaddr_vm vm_addr; socklen_t addr_len; int cid, port; addr_len = sizeof(vm_addr); if (getpeername(fd, (struct sockaddr *)&vm_addr, &addr_len)) return -1; if (vm_addr.svm_family != AF_VSOCK) return -1; cid = vm_addr.svm_cid; port = vm_addr.svm_port; if (tracecmd_get_debug()) tracecmd_debug("Connected to @%u:%u fd:%d\n", cid, port, fd); else tracecmd_plog("Connected to @%u:%u\n", cid, port); return 0; } static int try_splice_read_vsock(void) { int ret, sd, brass[2]; sd = socket(AF_VSOCK, SOCK_STREAM, 0); if (sd < 0) return -errno; ret = pipe(brass); if (ret < 0) goto out_close_sd; /* * On kernels that don't support splice reading from vsockets * this will fail with EINVAL, or ENOTCONN otherwise. * Technically, it should never succeed but if it does, claim splice * reading is supported. */ ret = splice(sd, NULL, brass[1], NULL, 10, 0); if (ret < 0) ret = errno != EINVAL; else ret = 1; close(brass[0]); close(brass[1]); out_close_sd: close(sd); return ret; } bool __hidden trace_vsock_can_splice_read(void) { static bool initialized, res; if (initialized) return res; res = try_splice_read_vsock() > 0; initialized = true; return res; } #define GET_LOCAL_CID 0x7b9 int __hidden trace_vsock_local_cid(void) { int cid; int fd; fd = open("/dev/vsock", O_RDONLY); if (fd < 0) return -errno; if (ioctl(fd, GET_LOCAL_CID, &cid)) cid = -errno; close(fd); return cid; } trace-cmd-v3.3.1/utest/000077500000000000000000000000001470231550600147125ustar00rootroot00000000000000trace-cmd-v3.3.1/utest/Makefile000066400000000000000000000023641470231550600163570ustar00rootroot00000000000000# SPDX-License-Identifier: GPL-2.0 include $(src)/scripts/utils.mk bdir:=$(obj)/utest TARGETS = $(bdir)/trace-utest OBJS = OBJS += trace-utest.o OBJS += tracecmd-utest.o LIBS += $(LIBTRACECMD_STATIC) -lcunit $(LIBTRACEEVENT_LDLAGS) $(LIBTRACEFS_LDLAGS) LIBS += $(ZLIB_LDLAGS) $(LIBZSTD_LDLAGS) OBJS := $(OBJS:%.o=$(bdir)/%.o) DEPS := $(OBJS:$(bdir)/%.o=$(bdir)/.%.d) VALGRIND = $(shell which valgrind) $(bdir): @mkdir -p $(bdir) $(OBJS): | $(bdir) $(DEPS): | $(bdir) $(bdir)/trace-utest: $(OBJS) $(LIBTRACECMD_STATIC) $(Q)$(do_app_build) $(bdir)/%.o: %.c $(Q)$(call do_fpic_compile) $(DEPS): $(bdir)/.%.d: %.c $(Q)$(CC) -M $(CPPFLAGS) $(CFLAGS) $< > $@ $(Q)$(CC) -M -MT $(bdir)/$*.o $(CPPFLAGS) $(CFLAGS) $< > $@ $(OBJS): $(bdir)/%.o : $(bdir)/.%.d dep_includes := $(wildcard $(DEPS)) test: $(TARGETS) test_mem: test ifeq (, $(VALGRIND)) $(error "No valgrind in $(PATH), cannot run memory test") endif ifneq ($(shell id -u), 0) $(error "The memory test should be run as root, as it reuqires full access to tracefs") endif CK_FORK=no $(VALGRIND) \ --show-leak-kinds=all --leak-resolution=high \ --leak-check=full --show-possibly-lost=yes \ --track-origins=yes -s \ $(bdir)/trace-utest clean: $(RM) $(TARGETS) $(bdir)/*.o $(bdir)/.*.d trace-cmd-v3.3.1/utest/README000066400000000000000000000005651470231550600156000ustar00rootroot00000000000000 Unit tests for trace-cmd libraries. The tests use CUnit framework: http://cunit.sourceforge.net/ which must be pre installed on the system, before building the unit tests. The framework can be downloaded, compiled and installed manually, or using a precompiled distro package: Fedora: CUnit CUnit-devel Ubuntu and Debian: libcunit1 libcunit1-doc libcunit1-dev trace-cmd-v3.3.1/utest/meson.build000066400000000000000000000007741470231550600170640ustar00rootroot00000000000000# SPDX-License-Identifier: GPL-2.0 # # Copyright (c) 2023 Daniel Wagner, SUSE LLC sources = [ 'tracecmd-utest.c', 'trace-utest.c', ] e = executable( 'trace-utest', sources, include_directories: [ incdir, libtracecmd_incdir, libtracecmd_private_incdir, libtracecmd_ext_incdir], dependencies: [ libtraceevent_dep, libtracefs_dep, zlib_dep, libzstd_dep, cunit_dep], link_with: [static_libtracecmd]) test('trace-utest', e) trace-cmd-v3.3.1/utest/trace-utest.c000066400000000000000000000034501470231550600173200ustar00rootroot00000000000000// SPDX-License-Identifier: LGPL-2.1 /* * Copyright (C) 2020, VMware, Tzvetomir Stoyanov * */ #include #include #include #include #include #include #include #include "trace-utest.h" const char *argv0; bool show_output; enum unit_tests { RUN_NONE = 0, RUN_TRACECMD = (1 << 0), RUN_ALL = 0xFFFF }; static void print_help(char **argv) { printf("Usage: %s [OPTIONS]\n", basename(argv[0])); printf("\t-s, --silent\tPrint test summary\n"); printf("\t-r, --run test\tRun specific test:\n"); printf("\t\t trace-cmd run trace-cmd tests\n"); printf("\t-h, --help\tPrint usage information\n"); exit(0); } int main(int argc, char **argv) { CU_BasicRunMode verbose = CU_BRM_VERBOSE; enum unit_tests tests = RUN_NONE; argv0 = argv[0]; for (;;) { int c; int index = 0; const char *opts = "+hsr:v"; static struct option long_options[] = { {"silent", no_argument, NULL, 's'}, {"run", required_argument, NULL, 'r'}, {"verbose", no_argument, NULL, 'v'}, {"help", no_argument, NULL, 'h'}, {NULL, 0, NULL, 0} }; c = getopt_long (argc, argv, opts, long_options, &index); if (c == -1) break; switch (c) { case 'r': if (strcmp(optarg, "trace-cmd") == 0) tests |= RUN_TRACECMD; else print_help(argv); break; case 's': verbose = CU_BRM_SILENT; break; case 'v': show_output = true; break; case 'h': default: print_help(argv); break; } } if (tests == RUN_NONE) tests = RUN_ALL; if (CU_initialize_registry() != CUE_SUCCESS) { printf("Test registry cannot be initialized\n"); return -1; } if (tests & RUN_TRACECMD) test_tracecmd_lib(); CU_basic_set_mode(verbose); CU_basic_run_tests(); CU_cleanup_registry(); return 0; } trace-cmd-v3.3.1/utest/trace-utest.h000066400000000000000000000004631470231550600173260ustar00rootroot00000000000000/* SPDX-License-Identifier: LGPL-2.1 */ /* * Copyright (C) 2020, VMware, Tzvetomir Stoyanov * */ #ifndef _TRACE_UTEST_H_ #define _TRACE_UTEST_H_ #include extern const char *argv0; extern bool show_output; void test_tracecmd_lib(void); #endif /* _TRACE_UTEST_H_ */ trace-cmd-v3.3.1/utest/tracecmd-utest.c000066400000000000000000000324671470231550600200160ustar00rootroot00000000000000// SPDX-License-Identifier: LGPL-2.1 /* * Copyright (C) 2020, VMware, Tzvetomir Stoyanov * */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "trace-utest.h" #ifndef PATH_MAX #define PATH_MAX 1024 #endif static char tracecmd_exec[PATH_MAX]; #define TRACECMD_SUITE "trace-cmd" #define TRACECMD_FILE "__trace_test__.dat" #define TRACECMD_FILE2 "__trace_test__2.dat" #define TRACECMD_OUT "-o", TRACECMD_FILE #define TRACECMD_OUT2 "-o", TRACECMD_FILE2 #define TRACECMD_IN "-i", TRACECMD_FILE #define TRACECMD_IN2 "-i", TRACECMD_FILE2 #define TRACECMD_SQL_HIST "SELECT irq FROM irq_handler_entry" #define TRACECMD_SQL_READ_HIST "show", "--hist", "irq_handler_entry" #define SYNTH_EVENT "wakeup" #define TRACECMD_SQL_SYNTH "-e", "-n", SYNTH_EVENT, "SELECT start.pid AS this_pid, (end.TIMESTAMP_USECS - start.TIMESTAMP_USECS) AS delta FROM sched_waking as start JOIN sched_switch AS end ON start.pid = end.next_pid" #define TRACECMD_SQL_START_SYNTH "start", "-e", SYNTH_EVENT static char **get_args(const char *cmd, va_list ap) { const char *param; char **argv; char **tmp; argv = tracefs_list_add(NULL, tracecmd_exec); if (!argv) return NULL; tmp = tracefs_list_add(argv, cmd); if (!tmp) goto fail; argv = tmp; for (param = va_arg(ap, const char *); param; param = va_arg(ap, const char *)) { tmp = tracefs_list_add(argv, param); if (!tmp) goto fail; argv = tmp; } return argv; fail: tracefs_list_free(argv); return NULL; } static void silent_output(void) { close(STDOUT_FILENO); open("/dev/null", O_WRONLY); close(STDERR_FILENO); open("/dev/null", O_WRONLY); } static int wait_for_exec(int pid) { int status; int ret; ret = waitpid(pid, &status, 0); if (ret != pid) return -1; return WEXITSTATUS(status) ? -1 : 0; } static int run_trace(const char *cmd, ...) { char **argv; va_list ap; int ret = -1; pid_t pid; va_start(ap, cmd); argv = get_args(cmd, ap); va_end(ap); if (!argv) return -1; pid = fork(); if (pid < 0) goto out; if (!pid) { if (!show_output) silent_output(); ret = execvp(tracecmd_exec, argv); exit (ret); } ret = wait_for_exec(pid); out: tracefs_list_free(argv); return ret; } static int pipe_it(int *ofd, int *efd, int (*func)(void *), void *data) { int obrass[2]; int ebrass[2]; pid_t pid; int ret; if (pipe(obrass) < 0) return -1; if (pipe(ebrass) < 0) goto fail_out; pid = fork(); if (pid < 0) goto fail; if (!pid) { char shret[32]; close(obrass[0]); close(STDOUT_FILENO); if (dup2(obrass[1], STDOUT_FILENO) < 0) exit(-1); close(ebrass[0]); close(STDERR_FILENO); if (dup2(obrass[1], STDERR_FILENO) < 0) exit(-1); ret = func(data); /* * valgrind triggers its reports when the application * exits. If the application does a fork() and the child * exits, it will still trigger the valgrind report for * all the allocations that were not freed by the parent. * * To prevent valgrind from triggering, do an execl() on * a basic shell that will simply exit with the return value. * This will quiet valgrind from reporting memory that has * been allocated by the parent up to here. */ snprintf(shret, 32, "exit %d", ret); execl("/usr/bin/sh", "/usr/bin/sh", "-c", shret, NULL); execl("/bin/sh", "/bin/sh", "-c", shret, NULL); /* If the above execl() fails, simply do an exit */ exit(ret); } close(obrass[1]); close(ebrass[1]); *ofd = obrass[0]; *efd = ebrass[0]; return pid; fail: close(ebrass[0]); close(ebrass[1]); fail_out: close(obrass[0]); close(obrass[1]); return -1; } struct do_grep { const char *cmd; va_list *ap; }; static int do_grep(void *data) { struct do_grep *gdata = data; char **argv; int ret; argv = get_args(gdata->cmd, *gdata->ap); if (!argv) exit(-1); ret = execvp(tracecmd_exec, argv); tracefs_list_free(argv); return ret; } struct do_grep_it { const char *match; const char *cmd; va_list *ap; }; static int do_grep_it(void *data) { struct do_grep_it *dgdata = data; struct do_grep gdata; FILE *fp; regex_t reg; char *buf = NULL; ssize_t n; size_t l = 0; int ofd; int efd; int pid; int ret; if (regcomp(®, dgdata->match, REG_ICASE|REG_NOSUB)) return -1; gdata.cmd = dgdata->cmd; gdata.ap = dgdata->ap; pid = pipe_it(&ofd, &efd, do_grep, &gdata); if (pid < 0) { regfree(®); return -1; } fp = fdopen(ofd, "r"); if (!fp) goto out; do { n = getline(&buf, &l, fp); if (n > 0 && regexec(®, buf, 0, NULL, 0) == 0) printf("%s", buf); } while (n >= 0); free(buf); out: ret = wait_for_exec(pid); if (fp) fclose(fp); else perror("fp"); close(ofd); close(efd); regfree(®); return ret > 0 ? 0 : ret; } struct do_grep_match { const char *match; const char *cmd; va_list *ap; }; static int grep_match(const char *match, const char *cmd, ...) { struct do_grep_it gdata; FILE *fp; va_list ap; char *buf = NULL; ssize_t n; size_t l = 0; bool found = false; int ofd; int efd; int pid; int ret; va_start(ap, cmd); gdata.match = match; gdata.cmd = cmd; gdata.ap = ≈ pid = pipe_it(&ofd, &efd, do_grep_it, &gdata); va_end(ap); if (pid < 0) return -1; fp = fdopen(ofd, "r"); if (!fp) goto out; do { n = getline(&buf, &l, fp); if (n > 0) { if (show_output) printf("%s", buf); found = true; } } while (n >= 0); free(buf); out: ret = wait_for_exec(pid); if (ret) n = 1; if (fp) fclose(fp); else { perror("fp"); close(ofd); } close(efd); return found ? 0 : 1; } static void test_trace_record_report(void) { int ret; ret = run_trace("record", TRACECMD_OUT, "-e", "sched", "sleep", "1", NULL); CU_TEST(ret == 0); ret = run_trace("convert", "--file-version", "6", TRACECMD_IN, TRACECMD_OUT2, NULL); CU_TEST(ret == 0); } static void test_trace_sqlhist_hist(void) { int ret; ret = run_trace("sqlhist", "-e", TRACECMD_SQL_HIST, NULL); CU_TEST(ret == 0); ret = grep_match(" *Hits: [0-9][0-9]*", TRACECMD_SQL_READ_HIST, NULL); CU_TEST(ret == 0); ret = run_trace("sqlhist", TRACECMD_SQL_SYNTH, NULL); CU_TEST(ret == 0); ret = run_trace(TRACECMD_SQL_START_SYNTH, NULL); CU_TEST(ret == 0); sleep(1); ret = grep_match(SYNTH_EVENT ":", "show", NULL); CU_TEST(ret == 0); tracefs_instance_reset(NULL); } static int read_stats(const char *out, const char *match, const char *cmd, ...) { struct do_grep_it gdata; FILE *fp; va_list ap; bool found = false; char *buf = NULL; char *p; ssize_t n; size_t l = 0; int ofd; int efd; int pid; int ret; int val; va_start(ap, cmd); gdata.match = match; gdata.cmd = cmd; gdata.ap = ≈ pid = pipe_it(&ofd, &efd, do_grep_it, &gdata); va_end(ap); if (pid < 0) return -1; fp = fdopen(ofd, "r"); if (!fp) goto out; do { n = getline(&buf, &l, fp); if (n > 0) { for (p = buf; isspace(*p); p++) ; val = atoi(p); found = true; if (show_output) printf("%s", buf); CU_TEST(val < 10000000); } } while (n >= 0); free(buf); out: ret = wait_for_exec(pid); if (fp) fclose(fp); else { perror("fp"); } if (!found) ret = -1; close(ofd); close(efd); return ret > 0 ? 0 : ret; } static void test_trace_record_max(void) { int ret; ret = run_trace("record", TRACECMD_OUT, "-p", "function", "-m", "5000", "sleep", "10", NULL); CU_TEST(ret == 0); ret = read_stats(TRACECMD_FILE, ".*bytes in size.*", "report", TRACECMD_IN, "--stat", NULL); CU_TEST(ret == 0); } static void test_trace_convert6(void) { struct stat st; int ret; /* If the trace data is already created, just use it, otherwise make it again */ if (stat(TRACECMD_FILE, &st) < 0) { ret = run_trace("record", TRACECMD_OUT, "-e", "sched", "sleep", "1", NULL); CU_TEST(ret == 0); } ret = grep_match("[ \t]6[ \t]*\\[Version\\]", "dump", TRACECMD_IN2, NULL); CU_TEST(ret == 0); } struct callback_data { long counter; struct trace_seq seq; }; static int read_events(struct tracecmd_input *handle, struct tep_record *record, int cpu, void *data) { struct tep_handle *tep = tracecmd_get_tep(handle); struct callback_data *cd = data; struct trace_seq *seq = &cd->seq; cd->counter++; trace_seq_reset(seq); tep_print_event(tep, seq, record, "%6.1000d", TEP_PRINT_TIME); trace_seq_printf(seq, " [%03d] ", cpu); tep_print_event(tep, seq, record, "%s-%d %s %s\n", TEP_PRINT_COMM, TEP_PRINT_PID, TEP_PRINT_NAME, TEP_PRINT_INFO); if (show_output) trace_seq_do_printf(seq); return 0; } static int read_events_10(struct tracecmd_input *handle, struct tep_record *record, int cpu, void *data) { struct callback_data *cd = data; read_events(handle, record, cpu, data); return cd->counter < 10 ? 0 : 1; } static void test_trace_library_read(void) { struct tracecmd_input *handle; struct callback_data data; struct stat st; int ret; data.counter = 0; trace_seq_init(&data.seq); /* If the trace data is already created, just use it, otherwise make it again */ if (stat(TRACECMD_FILE, &st) < 0) { ret = run_trace("record", TRACECMD_OUT, "-e", "sched", "sleep", "1", NULL); CU_TEST(ret == 0); } handle = tracecmd_open(TRACECMD_FILE, 0); CU_TEST(handle != NULL); ret = tracecmd_iterate_events(handle, NULL, 0, read_events, &data); CU_TEST(ret == 0); tracecmd_close(handle); CU_TEST(data.counter > 0); trace_seq_destroy(&data.seq); } static void test_trace_library_read_inc(void) { struct tracecmd_input *handle; struct callback_data data; struct stat st; long save_count; long total = 0; int ret; data.counter = 0; trace_seq_init(&data.seq); /* If the trace data is already created, just use it, otherwise make it again */ if (stat(TRACECMD_FILE, &st) < 0) { ret = run_trace("record", TRACECMD_OUT, "-e", "sched", "sleep", "1", NULL); CU_TEST(ret == 0); } /* First read all again */ handle = tracecmd_open(TRACECMD_FILE, 0); CU_TEST(handle != NULL); ret = tracecmd_iterate_events(handle, NULL, 0, read_events, &data); CU_TEST(ret == 0); CU_TEST(data.counter > 0); /* Save the counter */ save_count = data.counter; tracecmd_iterate_reset(handle); /* Read 10 at a time */ do { data.counter = 0; ret = tracecmd_iterate_events(handle, NULL, 0, read_events_10, &data); CU_TEST(ret >= 0); CU_TEST(data.counter <= 10); total += data.counter; } while (data.counter); CU_TEST(ret == 0); CU_TEST(total == save_count); trace_seq_destroy(&data.seq); tracecmd_close(handle); } static void test_trace_library_read_back(void) { struct tracecmd_input *handle; struct callback_data data; struct stat st; long save_count; int ret; data.counter = 0; trace_seq_init(&data.seq); /* If the trace data is already created, just use it, otherwise make it again */ if (stat(TRACECMD_FILE, &st) < 0) { ret = run_trace("record", TRACECMD_OUT, "-e", "sched", "sleep", "1", NULL); CU_TEST(ret == 0); } /* First read all again */ handle = tracecmd_open(TRACECMD_FILE, 0); CU_TEST(handle != NULL); ret = tracecmd_iterate_events(handle, NULL, 0, read_events, &data); CU_TEST(ret == 0); CU_TEST(data.counter > 0); /* Save the counter */ save_count = data.counter; tracecmd_iterate_reset(handle); /* Read backwards */ data.counter = 0; ret = tracecmd_iterate_events_reverse(handle, NULL, 0, read_events, &data, false); CU_TEST(ret == 0); CU_TEST(data.counter == save_count); /* Read forward again */ data.counter = 0; ret = tracecmd_iterate_events(handle, NULL, 0, read_events, &data); CU_TEST(ret == 0); CU_TEST(data.counter == save_count); /* Read backwards from where we left off */ data.counter = 0; ret = tracecmd_iterate_events_reverse(handle, NULL, 0, read_events, &data, true); CU_TEST(ret == 0); CU_TEST(data.counter == save_count); trace_seq_destroy(&data.seq); tracecmd_close(handle); } static int test_suite_destroy(void) { unlink(TRACECMD_FILE); unlink(TRACECMD_FILE2); return 0; } static int test_suite_init(void) { struct stat st; const char *p; /* The test must be in the utest directory */ for (p = argv0 + strlen(argv0) - 1; p > argv0 && *p != '/'; p--) ; if (*p == '/') snprintf(tracecmd_exec, PATH_MAX, "%.*s/../tracecmd/trace-cmd", (int)(p - argv0), argv0); else strncpy(tracecmd_exec, "../tracecmd/trace-cmd", PATH_MAX); if (stat(tracecmd_exec, &st) < 0) { fprintf(stderr, "In tree trace-cmd executable not found\n"); return 1; } if (!(st.st_mode & (S_IXUSR | S_IXGRP | S_IXOTH))) { fprintf(stderr, "In tree trace-cmd executable not executable\n"); return 1; } return 0; } void test_tracecmd_lib(void) { CU_pSuite suite = NULL; suite = CU_add_suite(TRACECMD_SUITE, test_suite_init, test_suite_destroy); if (suite == NULL) { fprintf(stderr, "Suite \"%s\" cannot be ceated\n", TRACECMD_SUITE); return; } CU_add_test(suite, "Simple record and report", test_trace_record_report); CU_add_test(suite, "Create a histogram", test_trace_sqlhist_hist); CU_add_test(suite, "Test convert from v7 to v6", test_trace_convert6); CU_add_test(suite, "Use libraries to read file", test_trace_library_read); CU_add_test(suite, "Use libraries to read file incremental", test_trace_library_read_inc); CU_add_test(suite, "Use libraries to read file backwards", test_trace_library_read_back); CU_add_test(suite, "Test max length", test_trace_record_max); }