pgbadger-3.3/0000755000175000017500000000000012140236301012416 5ustar darolddaroldpgbadger-3.3/Makefile.PL0000644000175000017500000000240712140236270014400 0ustar darolddarolduse ExtUtils::MakeMaker; # See lib/ExtUtils/MakeMaker.pm for details of how to influence # the contents of the Makefile that is written. use strict; my @ALLOWED_ARGS = ('INSTALLDIRS','DESTDIR'); # Parse command line arguments and store them as environment variables while ($_ = shift) { my ($k,$v) = split(/=/, $_, 2); if (grep(/^$k$/, @ALLOWED_ARGS)) { $ENV{$k} = $v; } } $ENV{DESTDIR} =~ s/\/$//; # Default install path my $DESTDIR = $ENV{DESTDIR} || ''; my $INSTALLDIRS = $ENV{INSTALLDIRS} || 'site'; WriteMakefile( 'DISTNAME' => 'pgbadger', 'NAME' => 'pgBadger', 'VERSION_FROM' => 'pgbadger', 'dist' => { 'COMPRESS'=>'gzip -9f', 'SUFFIX' => 'gz', 'ZIP'=>'/usr/bin/zip','ZIPFLAGS'=>'-rl' }, 'AUTHOR' => 'Gilles Darold (gilles@darold.net)', 'ABSTRACT' => 'pgBadger - PostgreSQL log analysis report', 'EXE_FILES' => [ qw(pgbadger) ], 'MAN1PODS' => { 'doc/pgBadger.pod' => 'blib/man1/pgbadger.1' }, 'DESTDIR' => $DESTDIR, 'INSTALLDIRS' => $INSTALLDIRS, 'clean' => {}, 'META_MERGE' => { resources => { homepage => 'http://projects.dalibo.org/pgbadger', repository => { type => 'git', git => 'git@github.com:dalibo/pgbadger.git', web => 'https://github.com/dalibo/pgbadger', }, }, } ); pgbadger-3.3/README0000644000175000017500000004142612140236270013312 0ustar darolddaroldNAME pgBadger - a fast PostgreSQL log analysis report SYNOPSIS pgbadger [options] logfile [...] PostgreSQL log analyzer with fully detailed reports and charts. Arguments: logfile can be a single log file, a list of files, or a shell command returning a list of files. If you want to pass log content from stdin use - as filename. Note that input from stdin will not work with csvlog. Options: -a | --average minutes : number of minutes to build the average graphs of queries and connections. -b | --begin datetime : start date/time for the data to be parsed in log. -c | --dbclient host : only report on entries for the given client host. -C | --nocomment : remove comments like /* ... */ from queries. -d | --dbname database : only report on entries for the given database. -e | --end datetime : end date/time for the data to be parsed in log. -f | --format logtype : possible values: syslog,stderr,csv. Default: stderr -G | --nograph : disable graphs on HTML output. Enable by default. -h | --help : show this message and exit. -i | --ident name : programname used as syslog ident. Default: postgres -j | --jobs number : number of jobs to run on parallel on each log file. Default is 1, run as single process. -J | --Jobs number : number of log file to parse in parallel. Default is 1, run as single process. -l | --last-parsed file: allow incremental log parsing by registering the last datetime and line parsed. Useful if you want to watch errors since last run or if you want one report per day with a log rotated each week. -m | --maxlength size : maximum length of a query, it will be restricted to the given size. Default: no truncate -n | --nohighlight : disable SQL code highlighting. -N | --appname name : only report on entries for given application name -o | --outfile filename: define the filename for output. Default depends on the output format: out.html, out.txt or out.tsung. To dump output to stdout use - as filename. -p | --prefix string : give here the value of your custom log_line_prefix defined in your postgresql.conf. Only use it if you aren't using one of the standard prefixes specified in the pgBadger documentation, such as if your prefix includes additional variables like client ip or application name. See examples below. -P | --no-prettify : disable SQL queries prettify formatter. -q | --quiet : don't print anything to stdout, even not a progress bar. -s | --sample number : number of query samples to store/display. Default: 3 -S | --select-only : use it if you want to report select queries only. -t | --top number : number of queries to store/display. Default: 20 -T | --title string : change title of the HTML page report. -u | --dbuser username : only report on entries for the given user. -U | --exclude-user username : exclude entries for the specified user from report. -v | --verbose : enable verbose or debug mode. Disabled by default. -V | --version : show pgBadger version and exit. -w | --watch-mode : only report errors just like logwatch could do. -x | --extension : output format. Values: text, html or tsung. Default: html -z | --zcat exec_path : set the full path to the zcat program. Use it if zcat or bzcat or unzip is not on your path. --pie-limit num : pie data lower than num% will show a sum instead. --exclude-query regex : any query matching the given regex will be excluded from the report. For example: "^(VACUUM|COMMIT)" You can use this option multiple times. --exclude-file filename: path of the file which contains all the regex to use to exclude queries from the report. One regex per line. --include-query regex : any query that does not match the given regex will be excluded from the report. For example: "(table_1|table_2)" You can use this option multiple times. --include-file filename: path of the file which contains all the regex of the queries to include from the report. One regex per line. --disable-error : do not generate error report. --disable-hourly : do not generate hourly report. --disable-type : do not generate query type report. --disable-query : do not generate query reports (slowest, most frequent, ...). --disable-session : do not generate session report. --disable-connection : do not generate connection report. --disable-lock : do not generate lock report. --disable-temporary : do not generate temporary report. --disable-checkpoint : do not generate checkpoint report. --disable-autovacuum : do not generate autovacuum report. Examples: pgbadger /var/log/postgresql.log pgbadger /var/log/postgres.log.2.gz /var/log/postgres.log.1.gz /var/log/postgres.log pgbadger /var/log/postgresql/postgresql-2012-05-* pgbadger --exclude-query="^(COPY|COMMIT)" /var/log/postgresql.log pgbadger -b "2012-06-25 10:56:11" -e "2012-06-25 10:59:11" /var/log/postgresql.log cat /var/log/postgres.log | pgbadger - # log prefix with stderr log output perl pgbadger --prefix '%t [%p]: [%l-1] user=%u,db=%d,client=%h' \ /pglog/postgresql-2012-08-21* perl pgbadger --prefix '%m %u@%d %p %r %a : ' /pglog/postgresql.log # Log line prefix with syslog log output perl pgbadger --prefix 'user=%u,db=%d,client=%h,appname=%a' \ /pglog/postgresql-2012-08-21* Use my 8 CPUs to parse my 10GB file faster, really faster perl pgbadger -j 8 /pglog/postgresql-9.1-main.log Generate Tsung sessions XML file with select queries only: perl pgbadger -S -o sessions.tsung --prefix '%t [%p]: [%l-1] user=%u,db=%d ' /pglog/postgresql-9.1.log Reporting errors every week by cron job: 30 23 * * 1 /usr/bin/pgbadger -q -w /var/log/postgresql.log -o /var/reports/pg_errors.html Generate report every week using incremental behavior: 0 4 * * 1 /usr/bin/pgbadger -q `find /var/log/ -mtime -7 -name "postgresql.log*"` \ -o /var/reports/pg_errors-`date +%F`.html -l /var/reports/pgbadger_incremental_file.dat This supposes that your log file and HTML report are also rotated every week. DESCRIPTION pgBadger is a PostgreSQL log analyzer built for speed with fully detailed reports from your PostgreSQL log file. It's a single and small Perl script that aims to replace and out-perform the old PHP script pgFouine. By the way, we would like to thank Guillaume Smet for all the work he has done on this really nice tool. We've been using it a long time, it is a really great tool! pgBadger is written in pure Perl language. It uses a Javascript library to draw graphs so that you don't need additional Perl modules or any other package to install. Furthermore, this library gives us additional features, such as zooming. pgBadger is able to autodetect your log file format (syslog, stderr or csvlog). It is designed to parse huge log files, as well as gzip, zip or bzip2 compressed files. See a complete list of features below. FEATURE pgBadger reports everything about your SQL queries: Overall statistics. The most frequent waiting queries. Queries that waited the most. Queries generating the most temporary files. Queries generating the largest temporary files. The slowest queries. Queries that took up the most time. The most frequent queries. The most frequent errors. The following reports are also available with hourly charts: Hourly queries statistics. Hourly temporary file statistics. Hourly checkpoints statistics. Hourly restartpoints statistics. Locks statistics. Queries by type (select/insert/update/delete). Distribution of queries type per database/application Sessions per database/user/client. Connections per database/user/client. Autovacuum and autoanalyze per table. All charts are zoomable and can be saved as PNG images. SQL queries reported are highlighted and beautified automatically. REQUIREMENT pgBadger comes as a single Perl script - you do not need anything other than a modern Perl distribution. Charts are rendered using a Javascript library so you don't need anything. Your browser will do all the work. If you planned to parse PostgreSQL CSV log files you might need some Perl Modules: Text::CSV_XS - to parse PostgreSQL CSV log files. This module is optional, if you don't have PostgreSQL log in the CSV format you don't need to install it. Compressed log file format is autodetected from the file exension. If pgBadger find a gz extension it will use the zcat utility, with a bz2 extension it will use bzcat and if the file extension is zip then the unzip utility will be used. If those utilities are not found in the PATH environment variable then use the --zcat command line option to change this path. For example: --zcat="/usr/local/bin/gunzip -c" or --zcat="/usr/local/bin/bzip2 -dc" --zcat="C:\tools\unzip -p" By default pgBadger will use the zcat, bzcat and unzip utilities following the file extension. If you use the default autodetection compress format you can mixed gz, bz2 or zip files. Specifying a custom value to --zcat option will remove this feature of mixed compressed format. Note that multiprocessing can not be used with compressed files or CSV files as well as under Windows platform. POSTGRESQL CONFIGURATION You must enable and set some configuration directives in your postgresql.conf before starting. You must first enable SQL query logging to have something to parse: log_min_duration_statement = 0 Here every statement will be logged, on busy server you may want to increase this value to only log queries with a higher duration time. Note that if you have log_statement set to 'all' nothing will be logged with log_line_prefix. See next chapter for more information. With 'stderr' log format, log_line_prefix must be at least: log_line_prefix = '%t [%p]: [%l-1] ' Log line prefix could add user and database name as follows: log_line_prefix = '%t [%p]: [%l-1] user=%u,db=%d ' or for syslog log file format: log_line_prefix = 'user=%u,db=%d ' Log line prefix for stderr output could also be: log_line_prefix = '%t [%p]: [%l-1] db=%d,user=%u ' or for syslog output: log_line_prefix = 'db=%d,user=%u ' You need to enable other parameters in postgresql.conf to get more information from your log files: log_checkpoints = on log_connections = on log_disconnections = on log_lock_waits = on log_temp_files = 0 Do not enable log_statement as their log format will not be parsed by pgBadger. Of course your log messages should be in English without locale support: lc_messages='C' but this is not only recommended by pgBadger. log_min_duration_statement, log_duration and log_statement If you want full statistics reports you must set log_min_duration_statement to 0 or more milliseconds. If you just want to report duration and number of queries and don't want all details about queries, set log_min_duration_statement to -1 to disable it and enable log_duration in your postgresql.conf file. If you want to add the most common request report you can either choose to set log_min_duration_statement to a higher value or choose to enable log_statement. Enabling log_min_duration_statement will add reports about slowest queries and queries that took up the most time. Take care that if you have log_statement set to 'all' nothing will be logged with log_line_prefix. Parallel processing To enable parallel processing you just have to use the -j N option where N is the number of cores you want to use. pgbadger will then proceed as follow: for each log file chunk size = int(file size / N) look at start/end offsets of these chunks fork N processes and seek to the start offset of each chunk each process will terminate when the parser reach the end offset of its chunk each process write stats into a binary temporary file wait for all children has terminated All binary temporary files generated will then be read and loaded into memory to build the html output. With that method, at start/end of chunks pgbadger may truncate or omit a maximum of N queries perl log file which is an insignificant gap if you have millions of queries in your log file. The chance that the query that you were looking for is loose is near 0, this is why I think this gap is livable. Most of the time the query is counted twice but truncated. When you have lot of small log files and lot of CPUs it is speedier to dedicate one core to one log file at a time. To enable this behavior you have to use option -J N instead. With 200 log files of 10MB each the use of the -J option start being really interesting with 8 Cores. Using this method you will be sure to not loose any queries in the reports. He are a benchmarck done on a server with 8 CPUs and a single file of 9.5GB. Option | 1 CPU | 2 CPU | 4 CPU | 8 CPU --------+---------+-------+-------+------ -j | 1h41m18 | 50m25 | 25m39 | 15m58 -J | 1h41m18 | 54m28 | 41m16 | 34m45 With 200 log files of 10MB each and a total og 2GB the results are slightly different: Option | 1 CPU | 2 CPU | 4 CPU | 8 CPU --------+-------+-------+-------+------ -j | 20m15 | 9m56 | 5m20 | 4m20 -J | 20m15 | 9m49 | 5m00 | 2m40 So it is recommanded to use -j unless you have hundred of small log file and can use at least 8 CPUs. IMPORTANT: when you are using parallel parsing pgbadger will generate a lot of temporary files in the /tmp directory and will remove them at end, so do not remove those files unless pgbadger is not running. They are all named with the following template tmp_pgbadgerXXXX.bin so they can be easily identified. INSTALLATION Download the tarball from github and unpack the archive as follow: tar xzf pgbadger-3.x.tar.gz cd pgbadger-3.x/ perl Makefile.PL make && sudo make install This will copy the Perl script pgbadger to /usr/local/bin/pgbadger by default and the man page into /usr/local/share/man/man1/pgbadger.1. Those are the default installation directories for 'site' install. If you want to install all under /usr/ location, use INSTALLDIRS='perl' as an argument of Makefile.PL. The script will be installed into /usr/bin/pgbadger and the manpage into /usr/share/man/man1/pgbadger.1. For example, to install everything just like Debian does, proceed as follows: perl Makefile.PL INSTALLDIRS=vendor By default INSTALLDIRS is set to site. AUTHORS pgBadger is an original work from Gilles Darold. It is maintained by the good folk at Dalibo and everyone who wants to contribute. LICENSE pgBadger is free software distributed under the PostgreSQL Licence. Copyright (c) 2012-2013, Dalibo A modified version of the SQL::Beautify Perl Module is embedded in pgBadger with copyright (C) 2009 by Jonas Kramer and is published under the terms of the Artistic License 2.0. pgbadger-3.3/.perltidyrc0000644000175000017500000000074212140236270014610 0ustar darolddarold--backup-and-modify-in-place --backup-file-extension=beforeTidy --block-brace-tightness=2 --brace-tightness=2 --closing-token-indentation=1 --continuation-indentation=4 --indent-columns=4 --maximum-line-length=134 --cuddled-else --opening-sub-brace-on-new-line --noopening-brace-on-new-line --nooutdent-labels --paren-tightness=2 --square-bracket-tightness=2 --vertical-tightness=0 --vertical-tightness-closing=0 --break-at-old-comma-breakpoints --entab-leading-whitespace=4 --tabs pgbadger-3.3/MANIFEST0000644000175000017500000000012112140236270013546 0ustar darolddaroldLICENSE Makefile.PL MANIFEST META.yml pgbadger README doc/pgBadger.pod ChangeLog pgbadger-3.3/LICENSE0000644000175000017500000000160712140236270013434 0ustar darolddaroldCopyright (c) 2012-2013, Dalibo Permission to use, copy, modify, and distribute this software and its documentation for any purpose, without fee, and without a written agreement is hereby granted, provided that the above copyright notice and this paragraph and the following two paragraphs appear in all copies. IN NO EVENT SHALL Dalibo BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF Dalibo HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. Dalibo SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND Dalibo HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. pgbadger-3.3/doc/0000755000175000017500000000000012140236270013170 5ustar darolddaroldpgbadger-3.3/doc/pgBadger.pod0000644000175000017500000003676612140236270015431 0ustar darolddarold=head1 NAME pgBadger - a fast PostgreSQL log analysis report =head1 SYNOPSIS pgbadger [options] logfile [...] PostgreSQL log analyzer with fully detailed reports and charts. Arguments: logfile can be a single log file, a list of files, or a shell command returning a list of files. If you want to pass log content from stdin use - as filename. Note that input from stdin will not work with csvlog. Options: -a | --average minutes : number of minutes to build the average graphs of queries and connections. -b | --begin datetime : start date/time for the data to be parsed in log. -c | --dbclient host : only report on entries for the given client host. -C | --nocomment : remove comments like /* ... */ from queries. -d | --dbname database : only report on entries for the given database. -e | --end datetime : end date/time for the data to be parsed in log. -f | --format logtype : possible values: syslog,stderr,csv. Default: stderr -G | --nograph : disable graphs on HTML output. Enable by default. -h | --help : show this message and exit. -i | --ident name : programname used as syslog ident. Default: postgres -j | --jobs number : number of jobs to run on parallel on each log file. Default is 1, run as single process. -J | --Jobs number : number of log file to parse in parallel. Default is 1, run as single process. -l | --last-parsed file: allow incremental log parsing by registering the last datetime and line parsed. Useful if you want to watch errors since last run or if you want one report per day with a log rotated each week. -m | --maxlength size : maximum length of a query, it will be restricted to the given size. Default: no truncate -n | --nohighlight : disable SQL code highlighting. -N | --appname name : only report on entries for given application name -o | --outfile filename: define the filename for output. Default depends on the output format: out.html, out.txt or out.tsung. To dump output to stdout use - as filename. -p | --prefix string : give here the value of your custom log_line_prefix defined in your postgresql.conf. Only use it if you aren't using one of the standard prefixes specified in the pgBadger documentation, such as if your prefix includes additional variables like client ip or application name. See examples below. -P | --no-prettify : disable SQL queries prettify formatter. -q | --quiet : don't print anything to stdout, even not a progress bar. -s | --sample number : number of query samples to store/display. Default: 3 -S | --select-only : use it if you want to report select queries only. -t | --top number : number of queries to store/display. Default: 20 -T | --title string : change title of the HTML page report. -u | --dbuser username : only report on entries for the given user. -U | --exclude-user username : exclude entries for the specified user from report. -v | --verbose : enable verbose or debug mode. Disabled by default. -V | --version : show pgBadger version and exit. -w | --watch-mode : only report errors just like logwatch could do. -x | --extension : output format. Values: text, html or tsung. Default: html -z | --zcat exec_path : set the full path to the zcat program. Use it if zcat or bzcat or unzip is not on your path. --pie-limit num : pie data lower than num% will show a sum instead. --exclude-query regex : any query matching the given regex will be excluded from the report. For example: "^(VACUUM|COMMIT)" You can use this option multiple times. --exclude-file filename: path of the file which contains all the regex to use to exclude queries from the report. One regex per line. --include-query regex : any query that does not match the given regex will be excluded from the report. For example: "(table_1|table_2)" You can use this option multiple times. --include-file filename: path of the file which contains all the regex of the queries to include from the report. One regex per line. --disable-error : do not generate error report. --disable-hourly : do not generate hourly report. --disable-type : do not generate query type report. --disable-query : do not generate query reports (slowest, most frequent, ...). --disable-session : do not generate session report. --disable-connection : do not generate connection report. --disable-lock : do not generate lock report. --disable-temporary : do not generate temporary report. --disable-checkpoint : do not generate checkpoint report. --disable-autovacuum : do not generate autovacuum report. Examples: pgbadger /var/log/postgresql.log pgbadger /var/log/postgres.log.2.gz /var/log/postgres.log.1.gz /var/log/postgres.log pgbadger /var/log/postgresql/postgresql-2012-05-* pgbadger --exclude-query="^(COPY|COMMIT)" /var/log/postgresql.log pgbadger -b "2012-06-25 10:56:11" -e "2012-06-25 10:59:11" /var/log/postgresql.log cat /var/log/postgres.log | pgbadger - # log prefix with stderr log output perl pgbadger --prefix '%t [%p]: [%l-1] user=%u,db=%d,client=%h' \ /pglog/postgresql-2012-08-21* perl pgbadger --prefix '%m %u@%d %p %r %a : ' /pglog/postgresql.log # Log line prefix with syslog log output perl pgbadger --prefix 'user=%u,db=%d,client=%h,appname=%a' \ /pglog/postgresql-2012-08-21* Use my 8 CPUs to parse my 10GB file faster, really faster perl pgbadger -j 8 /pglog/postgresql-9.1-main.log Generate Tsung sessions XML file with select queries only: perl pgbadger -S -o sessions.tsung --prefix '%t [%p]: [%l-1] user=%u,db=%d ' /pglog/postgresql-9.1.log Reporting errors every week by cron job: 30 23 * * 1 /usr/bin/pgbadger -q -w /var/log/postgresql.log -o /var/reports/pg_errors.html Generate report every week using incremental behavior: 0 4 * * 1 /usr/bin/pgbadger -q `find /var/log/ -mtime -7 -name "postgresql.log*"` \ -o /var/reports/pg_errors-`date +%F`.html -l /var/reports/pgbadger_incremental_file.dat This supposes that your log file and HTML report are also rotated every week. =head1 DESCRIPTION pgBadger is a PostgreSQL log analyzer built for speed with fully detailed reports from your PostgreSQL log file. It's a single and small Perl script that aims to replace and out-perform the old PHP script pgFouine. By the way, we would like to thank Guillaume Smet for all the work he has done on this really nice tool. We've been using it a long time, it is a really great tool! pgBadger is written in pure Perl language. It uses a Javascript library to draw graphs so that you don't need additional Perl modules or any other package to install. Furthermore, this library gives us additional features, such as zooming. pgBadger is able to autodetect your log file format (syslog, stderr or csvlog). It is designed to parse huge log files, as well as gzip, zip or bzip2 compressed files. See a complete list of features below. =head1 FEATURE pgBadger reports everything about your SQL queries: Overall statistics. The most frequent waiting queries. Queries that waited the most. Queries generating the most temporary files. Queries generating the largest temporary files. The slowest queries. Queries that took up the most time. The most frequent queries. The most frequent errors. The following reports are also available with hourly charts: Hourly queries statistics. Hourly temporary file statistics. Hourly checkpoints statistics. Hourly restartpoints statistics. Locks statistics. Queries by type (select/insert/update/delete). Distribution of queries type per database/application Sessions per database/user/client. Connections per database/user/client. Autovacuum and autoanalyze per table. All charts are zoomable and can be saved as PNG images. SQL queries reported are highlighted and beautified automatically. =head1 REQUIREMENT pgBadger comes as a single Perl script - you do not need anything other than a modern Perl distribution. Charts are rendered using a Javascript library so you don't need anything. Your browser will do all the work. If you planned to parse PostgreSQL CSV log files you might need some Perl Modules: Text::CSV_XS - to parse PostgreSQL CSV log files. This module is optional, if you don't have PostgreSQL log in the CSV format you don't need to install it. Compressed log file format is autodetected from the file exension. If pgBadger find a gz extension it will use the zcat utility, with a bz2 extension it will use bzcat and if the file extension is zip then the unzip utility will be used. If those utilities are not found in the PATH environment variable then use the --zcat command line option to change this path. For example: --zcat="/usr/local/bin/gunzip -c" or --zcat="/usr/local/bin/bzip2 -dc" --zcat="C:\tools\unzip -p" By default pgBadger will use the zcat, bzcat and unzip utilities following the file extension. If you use the default autodetection compress format you can mixed gz, bz2 or zip files. Specifying a custom value to --zcat option will remove this feature of mixed compressed format. Note that multiprocessing can not be used with compressed files or CSV files as well as under Windows platform. =head1 POSTGRESQL CONFIGURATION You must enable and set some configuration directives in your postgresql.conf before starting. You must first enable SQL query logging to have something to parse: log_min_duration_statement = 0 Here every statement will be logged, on busy server you may want to increase this value to only log queries with a higher duration time. Note that if you have log_statement set to 'all' nothing will be logged with log_line_prefix. See next chapter for more information. With 'stderr' log format, log_line_prefix must be at least: log_line_prefix = '%t [%p]: [%l-1] ' Log line prefix could add user and database name as follows: log_line_prefix = '%t [%p]: [%l-1] user=%u,db=%d ' or for syslog log file format: log_line_prefix = 'user=%u,db=%d ' Log line prefix for stderr output could also be: log_line_prefix = '%t [%p]: [%l-1] db=%d,user=%u ' or for syslog output: log_line_prefix = 'db=%d,user=%u ' You need to enable other parameters in postgresql.conf to get more information from your log files: log_checkpoints = on log_connections = on log_disconnections = on log_lock_waits = on log_temp_files = 0 Do not enable log_statement as their log format will not be parsed by pgBadger. Of course your log messages should be in English without locale support: lc_messages='C' but this is not only recommended by pgBadger. =head1 log_min_duration_statement, log_duration and log_statement If you want full statistics reports you must set log_min_duration_statement to 0 or more milliseconds. If you just want to report duration and number of queries and don't want all details about queries, set log_min_duration_statement to -1 to disable it and enable log_duration in your postgresql.conf file. If you want to add the most common request report you can either choose to set log_min_duration_statement to a higher value or choose to enable log_statement. Enabling log_min_duration_statement will add reports about slowest queries and queries that took up the most time. Take care that if you have log_statement set to 'all' nothing will be logged with log_line_prefix. =head1 Parallel processing To enable parallel processing you just have to use the -j N option where N is the number of cores you want to use. pgbadger will then proceed as follow: for each log file chunk size = int(file size / N) look at start/end offsets of these chunks fork N processes and seek to the start offset of each chunk each process will terminate when the parser reach the end offset of its chunk each process write stats into a binary temporary file wait for all children has terminated All binary temporary files generated will then be read and loaded into memory to build the html output. With that method, at start/end of chunks pgbadger may truncate or omit a maximum of N queries perl log file which is an insignificant gap if you have millions of queries in your log file. The chance that the query that you were looking for is loose is near 0, this is why I think this gap is livable. Most of the time the query is counted twice but truncated. When you have lot of small log files and lot of CPUs it is speedier to dedicate one core to one log file at a time. To enable this behavior you have to use option -J N instead. With 200 log files of 10MB each the use of the -J option start being really interesting with 8 Cores. Using this method you will be sure to not loose any queries in the reports. He are a benchmarck done on a server with 8 CPUs and a single file of 9.5GB. Option | 1 CPU | 2 CPU | 4 CPU | 8 CPU --------+---------+-------+-------+------ -j | 1h41m18 | 50m25 | 25m39 | 15m58 -J | 1h41m18 | 54m28 | 41m16 | 34m45 With 200 log files of 10MB each and a total og 2GB the results are slightly different: Option | 1 CPU | 2 CPU | 4 CPU | 8 CPU --------+-------+-------+-------+------ -j | 20m15 | 9m56 | 5m20 | 4m20 -J | 20m15 | 9m49 | 5m00 | 2m40 So it is recommanded to use -j unless you have hundred of small log file and can use at least 8 CPUs. IMPORTANT: when you are using parallel parsing pgbadger will generate a lot of temporary files in the /tmp directory and will remove them at end, so do not remove those files unless pgbadger is not running. They are all named with the following template tmp_pgbadgerXXXX.bin so they can be easily identified. =head1 INSTALLATION Download the tarball from github and unpack the archive as follow: tar xzf pgbadger-3.x.tar.gz cd pgbadger-3.x/ perl Makefile.PL make && sudo make install This will copy the Perl script pgbadger to /usr/local/bin/pgbadger by default and the man page into /usr/local/share/man/man1/pgbadger.1. Those are the default installation directories for 'site' install. If you want to install all under /usr/ location, use INSTALLDIRS='perl' as an argument of Makefile.PL. The script will be installed into /usr/bin/pgbadger and the manpage into /usr/share/man/man1/pgbadger.1. For example, to install everything just like Debian does, proceed as follows: perl Makefile.PL INSTALLDIRS=vendor By default INSTALLDIRS is set to site. =head1 AUTHORS pgBadger is an original work from Gilles Darold. It is maintained by the good folk at Dalibo and everyone who wants to contribute. =head1 LICENSE pgBadger is free software distributed under the PostgreSQL Licence. Copyright (c) 2012-2013, Dalibo A modified version of the SQL::Beautify Perl Module is embedded in pgBadger with copyright (C) 2009 by Jonas Kramer and is published under the terms of the Artistic License 2.0. pgbadger-3.3/.gitignore0000644000175000017500000000002312140236270014406 0ustar darolddarold# Swap files *.swp pgbadger-3.3/CONTRIBUTING.md0000644000175000017500000000056012140236270014655 0ustar darolddarold# How to contribute # ##Before Submitting an issue## 1. Upgrade to the latest version of pgBadger and see if the problem remains 2. Look at the [closed issues](https://github.com/dalibo/pgbadger/issues?state=closed), we may have alreayd answered to a similar problem 3. [Read the doc](http://dalibo.github.com/pgbadger/documentation.html). It is short and useful. pgbadger-3.3/ChangeLog0000644000175000017500000007411012140236270014200 0ustar darolddarold2013-05-01 - Version 3.3 This release adds four more useful reports about queries that generate locks and temporary files. An other new report about restartpoint on slaves and several bugs fix or cosmetic change. Support to parallel processing under Windows OS has been removed. - Remove parallel processing under Windows platform, the use of waitpid is freezing pgbadger. Thanks to Saurabh Agrawal for the report. I'm not comfortable with that OS this is why support have been removed, if someone know how to fix that, please submit a patch. - Fix Error in tempfile() under Windows. Thanks to Saurabh Agrawal for the report. - Fix wrong queries storage with lock and temporary file reports. Thanks to Thomas Reiss for the report. - Add samples queries to "Most frequent waiting queries" and "Queries generating the most temporary files" report. - Add two more reports about locks: 'Most frequent waiting queries (N)", and "Queries that waited the most". Thanks to Thomas Reiss for the patch. - Add two reports about temporary files: "Queries generating the most temporary files (N)" and "Queries generating the largest temporary files". Thanks to Thomas Reiss for the patch. - Cosmetic change to the Min/Max/Avg duration columns. - Fix report of samples error with csvlog format. Thanks to tpoindessous for the report. - Add --disable-autovacuum to the documentation. Thanks to tpoindessous for the report. - Fix unmatched ) in regex when using %s in prefix. - Fix bad average size of temporary file in Overall statistics report. Thanks to Jehan Guillaume de Rorthais for the report. - Add restartpoint reporting. Thanks to Guillaume Lelarge for the patch. - Made some minor change in CSS. - Replace %% in log line prefix internally by a single % so that it could be exactly the same than in log_line_prefix. Thanks to Cal Heldenbrand for the report. - Fix perl documentation header, thanks to Cyril Bouthors for the patch. 2013-04-07 - Version 3.2 This is mostly a bug fix release, it also adds escaping of HTML code inside queries and the adds Min/Max reports with Average duration in all queries reports. - In multiprocess mode, fix case where pgbadger does not update the last-parsed file and do not take care of the previous run. Thanks to Kong Man for the report. - Fix case where pgbadger does not update the last-parsed file. Thanks to Kong Man for the report. - Add CDATA to make validator happy. Thanks to Euler Taveira de Oliveira for the patch. - Some code review by Euler Taveira de Oliveira, thanks for the patch. - Fix case where stat were multiplied by N when -J was set to N. Thanks to thegnorf for the report. - Add a line in documentation about log_statement that disable log_min_duration_statement when it is set to all. - Add quick note on how to contribute, thanks to Damien Clochard for the patch. - Fix issue with logs read from stdin. Thanks to hubert depesz lubaczewski for the report. - Force pgbadger to not try to beautify queries bigger than 10kb, this will take too much time. This value can be reduce in the future if hang with long queries still happen. Thanks to John Rouillard for the report. - Fix an other issue in replacing bind param when the bind value is alone on a single line. Thanks to Kjeld Peters for the report. - Fix parsing of compressed files together with uncompressed files using the the -j option. Uncompressed files are now processed using split method and compressed ones are parsed per one dedicated process. - Replace zcat by gunzip -c to fix an issue on MacOsx. Thanks to Kjeld Peters for the report. - Escape HTML code inside queries. Thanks to denstark for the report. - Add Min/Max in addition to Average duration values in queries reports. Thanks to John Rouillard fot the feature request. - Fix top slowest array size with binary format. - Fix an other case with bind parameters with value in next line and the top N slowest queries that was repeated until N even if the real number of queries was lower. Thanks to Kjeld Peters for the reports. - Fix non replacement of bind parameters where there is line breaks in the parameters, aka multiline bind parameters. Thanks to Kjeld Peters for the report. - Fix error with seekable export tag with Perl v5.8. Thanks to Jeff Bohmer for the report. - Fix parsing of non standard syslog lines begining with a timestamp like "2013-02-28T10:35:11-05:00". Thanks to Ryan P. Kelly for the report. - Fix issue #65 where using -c | --dbclient with csvlog was broken. Thanks to Jaime Casanova for the report. - Fix empty report in watchlog mode (-w option). 2013-02-21 - Version 3.1 This is a quick release to fix missing reports of most frequent errors and slowest normalized queries in previous version published yesterday. - Fix empty report in watchlog mode (-w option). - Force immediat die on command line options error. - Fix missing report of most frequent events/errors report. Thanks to Vincent Laborie for the report. - Fix missing report of slowest normalized queries. Thanks to Vincent Laborie for the report. - Fix display of last print of progress bar when quiet mode is enabled. 2013-02-20 - Version 3.0 This new major release adds parallel log processing by using as many cores as wanted to parse log files, the performances gain is directly related to the number of cores specified. There's also new reports about autovacuum/autoanalyze informations and many bugs have been fixed. - Update documentation about log_duration, log_min_duration_statement and log_statement. - Rewrite dirty code around log timestamp comparison to find timestamp of the specified begin or ending date. - Remove distinction between logs with duration enabled from variables log_min_duration_statement and log_duration. Commands line options --enable-log_duration and --enable-log_min_duration have been removed. - Update documentation about parallel processing. - Remove usage of Storable::file_magic to autodetect binary format file, it is not include in core perl 5.8. Thanks to Marc Cousin for the report. - Force multiprocess per file when files are compressed. Thanks to Julien Rouhaud for the report. - Add progress bar logger for multiprocess by forking a dedicated process and using pipe. Also fix some bugs in using binary format that duplicate query/error samples per process. - chmod 755 pgbadger - Fix checkpoint reports when there is no checkpoint warnings. - Fix non report of hourly connections/checkpoint/autovacuum when not query is found in log file. Thanks to Guillaume Lelarge for the report. - Add better handling of signals in multiprocess mode. - Add -J|--job_per_file command line option to force pgbadger to use one process per file instead of using all to parse one file. Useful to have better performances with lot of small log file. - Fix parsing of orphan lines with stderr logs and log_line_prefix without session information into the prefix (%l). - Update documentation about -j | --jobs option. - Allow pgbadger to use several cores, aka multiprocessing. Add options -j | --jobs option to specify the number of core to use. - Add autovacuum and autoanalyze infos to binary format. - Fix case in SQL code highlighting where QQCODE temp keyword was not replaced. Thanks to Julien Ruhaud for the report. - Fix CSS to draw autovacuum graph and change legend opacity. - Add pie graph to show repartition of number of autovacuum per table and number of tuples removed by autovacuum per table. - Add debug information about selected type of log duration format. - Add report of tuples/pages removed in report of Vacuums by table. - Fix major bug on syslog parser where years part of the date was wrongly extracted from current date with logs generated in 2012. - Fix issue with Perl 5.16 that do not allow "ss" inside look-behind assertions. Thanks to Cedric for the report. - New vacuum and analyze hourly reports and graphs. Thanks to Guillaume Lelarge for the patch. UPGRADE: if you are running pgbadger by cron take care if you were using one of the following option: --enable-log_min_duration and --enable-log_duration, they have been removed and pgbadger will refuse to start. 2013-01-17 - Version 2.3 This release fixes several major issues especially with csvlog and a memory leak with log parsing using a start date. There's also several improvement like new reports of number of queries by database and application. Mouse over reported queries will show database, user, remote client and application name where they are executed. A new binary input/output format have been introduced to allow saving or reading precomputed statistics. This will allow incremental reports based on periodical runs of pgbader. This is a work in progress fully available with next coming major release. Several SQL code beautifier improvement from pgFormatter have also been merged. - Clarify misleading statement about log_duration: log_duration may be turned on depending on desired information. Only log_statement must not be on. Thanks to Matt Romaine for the patch. - Fix --dbname and --dbuser not working with csvlog format. Thanks to Luke Cyca for the report. - Fix issue in SQL formatting that prevent left back indentation when major keywords were found. Thanks to Kevin Brannen for the report. - Display 3 decimals in time report so that ms can be seen. Thanks to Adam Schroder for the request. - Force the parser to not insert a new line after the SET keyword when the query begin with it. This is to preserve the single line with queries like SET client_encoding TO "utf8"; - Add better SQL formatting of update queries by adding a new line after the SET keyword. Thanks to pilat66 for the report. - Update copyright and documentation. - Queries without application name are now stored under others application name. - Add report of number of queries by application if %a is specified in the log_line_prefix. - Add link menu to the request per database and limit the display of this information when there is more than one database. - Add report of requests per database. - Add report of user,remote client and application name to all request info. - Fix memory leak with option -b (--begin) and in incremental log parsing mode. - Remove duration part from log format auto-detection. Thanks to Guillaume Lelarge for the report. - Fix a performance issue on prettifying SQL queries that makes pgBagder several time slower that usual to generate the HTML output. Thanks to Vincent Laborie for the report. - Add missing SQL::Beautify paternity. - Add 'binary' format as input/output format. The binary output format allows to save log statistics in a non human readable file instead of an HTML or text file. These binary files might then be used as regular input files, combined or not, to produce a html or txt report. Thanks to Jehan Guillaume de Rorthais for the patch. - Remove port from the session regex pattern to match all lines. - Fix the progress bar. It was trying to use gunzip to get real file size for all formats (by default). Unbreak the bz2 format (that does not report real size) and add support for zip format. Thanks to Euler Taveira de Oliveira fort the patch. - Fix some typos and grammatical issues. Thanks to Euler Taveira de Oliveira fort the patch. - Improve SQL code highlighting and keywords detection merging change from pgFormatter project. - Add support to hostname or ip address in the client detection. Thanks to stuntmunkee for the report. - pgbadger will now only reports execute statement of the extended protocol (parse/bind/execute). Thanks to pierrestroh for the report. - Fix numerous typos as well as formatting and grammatical issues. Thanks to Thom Brown for the patch. - Add backward compatibility to obsolete --client command line option. If you were using the short option -c nothing is changed. - Fix issue with --dbclient and %h in log_line_prefix. Thanks to Julien Rouhaud for the patch. - Fix multiline progress bar output. - Allow usage of a dash into database, user and application names when prefix is used. Thanks to Vipul for the report. - Mouse over queries will now show in which database they are executed in the overviews (Slowest queries, Most frequent queries, etc. ). Thank to Dirk-Jan Bulsink for the feature request. - Fix missing keys on %cur_info hash. Thanks to Marc Cousin for the report. - Move opening file handle to log file into a dedicated function. Thanks to Marc Cousin for the patch. - Replace Ctrl+M by printable \r. Thanks to Marc Cousin for the report. 2012-11-13 - Version 2.2 This release add some major features like tsung output, speed improvement with csvlog, report of shut down events, new command line options to generate report excluding some user(s), to build report based on select queries only, to specify regex of the queries that must only be included in the report and to remove comments from queries. Lot of bug fixes, please upgrade. - Update PostgreSQL keywords list for 9.2 - Fix number of queries in progress bar with tsung output. - Remove obsolete syslog-ng and temporary syslog-ll log format added to fix some syslog autodetection issues. There is now just one syslog format: syslog, differences between syslog formats are detected and the log parser is adaptive. - Add comment about the check_incremental_position() method - Fix reports with empty graphs when log files were not in chronological order. - Add report of current total of queries and events parsed in progress bar. Thanks to Jehan-Guillaume de Rorthais for the patch. - Force pgBadger to use an require the XS version of Text::CSV instead of the Pure Perl implementation. It is a good bit faster thanks to David Fetter for the patch. Note that using csvlog is still a bit slower than syslog or stderr log format. - Fix several issue with tsung output. - Add report of shut down events - Add debug information on command line used to pipe compressed log file when -v is provide. - Add -U | --exclude-user command line option to generate report excluded user. Thanks to Birta Levente for the feature request. - Allow some options to be specified multiple time or be written as a coma separated list of value, here are these options: --dbname, --dbuser, --dbclient, --dbappname, --exclude_user. - Add -S | --select-only option to build report only on select queries. - Add first support to tsung output, see usage. Thanks to Guillaume Lelarge for the feature request. - Add --include-query and --include-file to specify regex of the queries that must only be included in the report. Thanks to Marc Cousin for the feature request. - Fix auto detection of log_duration and log_min_duration_statement format. - Fix parser issue with Windows logs without timezone information. Thanks to Nicolas Thauvin for the report. - Fix bug in %r = remote host and port log line prefix detection. Thanks to Hubert Depesz Lubaczewski for the report. - Add -C | --nocomment option to remove comment like /* ... */ from queries. Thanks to Hubert Depesz Lubaczewski for the feature request. - Fix escaping of log_line_prefix. Thanks to Hubert Depesz Lubaczewski for the patch. - Fix wrong detection of update queries when a query has a object names containing update and set. Thanks to Vincent Laborie for the report. 2012-10-10 - Version 2.1 This release add a major feature by allowing any custom log_line_prefix to be used by pgBadger. With stderr output you at least need to log the timestamp (%t) the pid (%p) and the session/line number (%l). Support to log_duration instead of log_min_duration_statement to allow reports simply based on duration and count report without query detail and report. Lot of bug fixes, please upgrade asap. - Add new --enable-log_min_duration option to force pgbadger to use lines generated by the log_min_duration_statement even if the log_duration format is autodetected. Useful if you use both but do not log all queries. Thanks to Vincent Laborie for the feature request. - Add syslog-ng format to better handle syslog traces with notation like: [ID * local2.info]. It is autodetected but can be forced in the -f option with value set to: syslog-ng. - Add --enable-log_duration command line option to force pgbadger to only use the log_duration trace even if log_min_duration_statement traces are autodetected. - Fix display of empty hourly graph when no data were found. - Remove query type report when log_duration is enabled. - Fix a major bug in query with bind parameter. Thanks to Marc Cousin for the report. - Fix detection of compressed log files and allow automatic detection and uncompress of .gz, .bz2 and .zip files. - Add gunzip -l command to find the real size of a gzip compressed file. - Fix log_duration only reports to not take care about query detail but just count and duration. - Fix issue with compressed csvlog. Thanks to Philip Freeman for the report. - Allow usage of log_duration instead of log_min_duration_statement to just collect statistics about the number of queries and their time. Thanks to Vincent Laborie for the feature request. - Fix issue on syslog format and autodetect with additional info like: [ID * local2.info]. Thanks to kapsalar for the report. - Removed unrecognized log line generated by deadlock_timeout. - Add missing information about unsupported csv log input from stdin. It must be read from a file. Thank to Philip Freeman for the report. - Fix issue #28: Illegal division by zero with log file without query and txt output. Thanks to rlowe for the report. - Update documentation about the -N | --appname option. - Rename --name option into --appname. Thanks to Guillaume Lellarge for the patch. - Fix min/max value in xasis that was always represented 2 days by default. Thanks to Casey Allen Shobe for the report. - Fix major bug when running pgbadger with the -e option. Thanks to Casey Allen Shobe for the report and the great help - Change project url to http://dalibo.github.com/pgbadger/. Thanks to Damien Clochard for this new hosting. - Fix lot of issues in CSV parser and force locale to be C. Thanks to Casey Allen Shobe for the reports. - Improve speed with custom log_line_prefix. - Merge pull request #26 from elementalvoid/helpdoc-fix - Fixed help text for --exclude-file. Old help text indicated that the option name was --exclude_file which was incorrect. - Remove the obsolete --regex-user and --regex-db options that was used to specify a search pattern in the log_line_prefix to find the user and db name. This is replaced by the --prefix option. - Replace Time column report header by Hour. - Fix another issue in log_line_prefix parser with stderr format - Add a more complex example using log_line_prefix - Fix log_line_prefix issue when using timepstamp with millisecond. - Add support to use any custom log_line_prefix with new option -p or --prefix. See README for an example. - Fix false autodetection of CSV format when log_statement is enable or in possible other cases. This was resulting in error: "FATAL: cannot use CSV". Thanks to Thomas Reiss for the report. - Fix display of empty graph of connections per seconds - Allow character : in log line prefix, it will no more break the log parsing. Thanks to John Rouillard for the report. - Add report of configuration parameter changes into the errors report and change errors report by events report to handle important messages that are not errors. - Allow pgbadger to recognize " autovacuum launcher" messages. 2012-08-21 - version 2.0 This major version adds some changes not backward compatible with previous versions. Options -p and -g are not more used as progress bar and graphs generation are enabled by default now. The obsolete -l option use to specify the log file to parse has been reused to specify an incremental file. Outside these changes and some bug fix there's also new features: * Using an incremental file with -l option allow to parse multiple time a single log file and to "seek" at the last line parsed during the previous run. Useful if you have a log rotation not sync with your pgbadger run. For exemple you can run somthing like this: pgbadger `find /var/log/postgresql/ -name "postgresql*" -mtime -7 -type f` \ -o report_`date +%F`.html -l /var/run/pgbadger/last_run.log * All queries diplayed in the HTML report are now clickable to display or hide a nice SQL query format. This is called SQL format beautifier. * CSV log parser have been entirely rewritten to handle csv with multiline. Every one should upgrade. - Change license from BSD like to PostgreSQL license. Request from Robert Treat. - Fix wrong pointer on Connections per host menu. Reported by Jean-Paul Argudo. - Small fix for sql formatting adding scrollbars. Patch by Julien Rouhaud. - Add SQL format beautifier on SQL queries. When you will click on a query it will be beautified. Patch by Gilles Darold - The progress bar is now enabled by default, the -p option has been removed. Use -q | --quiet to disable it. Patch by Gilles Darold. - Graphs are now generated by default for HTML output, option -g as been remove and option -G added to allow disabling graph generation. Request from Julien Rouhaud, patch by Gilles Darold. - Remove option -g and -p to the documentation. Patch by Gilles Darold. - Fix case sensitivity in command line options. Patch by Julien Rouhaud. - Add -T|--title option to change report title. Patch by Yury Bushmelev. - Add new option --exclude-file to exclude specific commands with regex stated in a file. This is a rewrite by Gilles Darold of the neoeahit (Vipul) patch. - CSV log parser have been entirely rewritten to handle csv with multi line, it also adds approximative duration for csvlog. Reported by Ludhimila Kendrick, patch by Gilles Darold. - Alphabetical reordering of options list in method usage() and documentation. Patch by Gilles Darold. - Remove obsolete -l | --logfile command line option, the -l option will be reused to specify an incremental file. Patch by Gilles Darold. - Add -l | --last-parsed options to allow incremental run of pgbadger. Patch by Gilles Darold. - Replace call to timelocal_nocheck by timegm_nocheck, to convert date time into second from the epoch. This should fix timezone issue. Patch by Gilles Darold. - Change regex on log parser to allow missing ending space in log_line_prefix. This seems a common mistake. Patch by Gilles Darold. - print warning when an empty log file is found. Patch by Gilles Darold. - Add perltidy rc file to format pgbadger Perl code. Patch from depesz. 2012-07-15 - version 1.2 This version adds some reports and fixes a major issue in log parser. Every one should upgrade. - Rewrite this changelog to be human readable. - Add -v | --verbose to enable debug mode. It is now disable by default - Add hourly report of checkpoint warning when checkpoints are occuring too frequently, it will display the hourly count and the average occuring time. - Add new report that sums the messages by log types. The report shows the number of messages of each log type, and a percentage. It also displays a pie graph. Patch by Guillaume Lelarge. - Add missing pie graph on locks by type report. - Format pie mouse track to display values only. - Fix graph download button id on new connection graph. - Add trackFormatter to flotr2 line graphs to show current x/y values. - Fix issue on per minute minimum value. - Add a note about Windows Os and zcat as well as a more general note about using compressed log file in other format than gzip. - Complete rewrite of the log parser to handle unordered log lines. Data are now stored by pid before and added to the global statistics at end. Error report now include full details, statements, contexts and hints when available. Deadlock are also fully reported with the concerned queries. - Fix miss handling of multi lines queries on syslog. - Add -a|--average option to configure the per minutes average interval for queries and connexions. If you want the average to be calculated each minutes instead of the 5 per default, use --average 1 or for the default --average 5. If you want average per hour set it to 60. - Add hourly statistics of connections and sessions as well as a chart about the number of connection per second (5 minutes average). - Allow OTHERS type of queries lower than 2% to be include in the sum of types < 2%. - Add autodetection of syslog ident name if different than the default "postgres" and that there is just one ident name in the log. - Remove syslog replacement of tabulation by #011 still visible when there was multiple tabulation. - Fix autodetection of log format syslog with single-digit day number in date. - Add ChangeLog to MANIFEST and change URI in html footer. - Check pgBadger compatibility with Windows Oses. Run perfectly. 2012-07-04 - version 1.1 This release fixes lot of issues and adds several main features. New feature: - Add possibility to get log from stdin - Change syslog parsing regex to allow log timestamp in log_line_prefix very often forgotten when log destination is changed from stderr to syslog. - Add documentation for the -z | --zcat command line option. - Allow `zcat` location to be specified via `--zcat` - David E. Wheeler - Add --disable-session,--disable-connection and disable-checkpoint command line options to remove their respective reports from the output - Add --disable-query command line option to remove queries statistics from the output - Add --disable-hourly command line option to remove hourly statistics from the output - Add --disable-error command line option to remove error report from the output - Add --exclude-query option to exclude types of queries by specifying a regex - Set thousand separator and decimal separator to be locale dependant - Add -w option to only report errors - Add Makefile.PL and full POD documentation to the project - Allow multiple log files from command line - Add simple csvlog support - Alex Hunsaker - Hourly report for temporary files and checkpoints have moved in a separate table. - Add hourly connections and sessions statistics. - Add a chart about the number of connections per seconds. Bug fix: - Add information about log format requirement (lc_message = 'C'). Reported by Alain Benard. - Fix for begin/end dates with single digit day using syslog. Patch by Joseph Marlin. - Fix handle of syslog dates with single-digit day number. Patch by Denis Orlikhin. - Fix many English syntax in error messages and documentation. Patch by Joseph Marlin. - Fix non terminated TH html tag in checkpoint hourly table. Reported by Joseph Marlin. - "Log file" section will now only report first and last log file parsed - Fix empty output in hourly temporary file stats. - Fix wrapping query that goes out of the table and makes the window scroll horizontally. Asked by Isaac Reuben. - Fix code where != was replaced by $$CLASSSY0A$$!=$$CLASSSY0B$$ in the output. Reported by Isaac Reuben - Fix and review text report output. - Fix an issue in SQL code highligh replacement. - Complete review of the HTML output. - Add .gitignore for swap files. Patch by Vincent Picavet - Fix wrong variable for user and database filter. Patch by Vincent Picavet. - Change default regexp for user and db to be able to detect both. Patch by Vincent Picavet. - Fix false cur_date when using syslog and allow -b and -e options to work. Patch by Vincent Picavet. - Fix some case where logs where not detected as PostgreSQL log lines. - Added explanation for --begin and --end datetime setting. Patch by ragged. - Added -v / --version. Patch by ragged. - Fix usage information and presentation in README file. 2012-05-04 - version to 1.0 First public release of pgBadger. New feature: - Add graph of ckeckpoint Wal files usage (added, removed, recycled). - Add --image-format to allow the change of the default png image format to jpeg. - Allow download of all pie graphics as images. - Add --pie-limit to sum all data lower than this percentage limit to avoid label overlap. - Allow download of graphics as PNG images. - Replace GD::Graph by the Flotr2 javascript library to draw graphics. Patch by Guillaume Lelarge - Add pie graphs for session, database, user and host. Add a --quiet option to remove debug output and --progress to show a progress bar during log parsing - Add pie graph for Queries by type. - Add graph for checkpoint write buffer per hours - Allow log parsing without any log_line_prefix and extend it to be defined by the user. Custom log_line prefix can be parsed using user defined regex with command line option --regex-db and --regex-user. For exemple the default regex of pgbadger to parse user and db name from log_line_prefix can be written like this: pgbadger -l mylogfile.log --regex-user="user=([^,]*)," \ --regex-db="db=([^,]*)" - Separe log_line_prefix from log level part in the parser to extend log_line_prefix parsing - If there is just one argument, assume it is the logfile and use default value for all other parameters - Add autodetection of log format (syslog or stderr) if none is given with option -f - Add --outfile option to dump output to a file instead of stdout. Default filename is out.html or out.txt following the output format. To dump to stdout set filename to - - Add --version command line option to show current pgbadger version. Bug fix: - Rearrange x and y axis - Fix legend opacity on graphics - Rearrange Overall stats view - Add more "normalization" on errors messages - Fix samples error with normalyzed error instead of real error message - Fix an other average size of temporary file decimal limit - Force quiet mode when --progress is used - Fix per sessions graphs - Fix sort order of days/hours into hours array - Fix sort order of days into graphics - Remove display of locks, sessions and connections statistics when none are available - Fix display of empty column of checkpoint when no checkpoint was found in log file pgbadger-3.3/META.yml0000644000175000017500000000051612140236270013676 0ustar darolddarold# http://module-build.sourceforge.net/META-spec.html #XXXXXXX This is a prototype!!! It will change in the future!!! XXXXX# name: pgBadger version: 1.1 version_from: pgbadger installdirs: site recommends: Text::CSV_XS: 0 distribution_type: script generated_by: ExtUtils::MakeMaker version 6.17 pgbadger-3.3/pgbadger0000755000175000017500000124044112140236270014132 0ustar darolddarold#!/usr/bin/perl #------------------------------------------------------------------------------ # # pgBadger - Advanced PostgreSQL log analyzer # # This program is open source, licensed under the PostgreSQL Licence. # For license terms, see the LICENSE file. #------------------------------------------------------------------------------ # # Settings in postgresql.conf # # You should enable SQL query logging with log_min_duration_statement >= 0 # With stderr output # Log line prefix should be: log_line_prefix = '%t [%p]: [%l-1] ' # Log line prefix should be: log_line_prefix = '%t [%p]: [%l-1] user=%u,db=%d ' # Log line prefix should be: log_line_prefix = '%t [%p]: [%l-1] db=%d,user=%u ' # With syslog output # Log line prefix should be: log_line_prefix = 'db=%d,user=%u ' # # Additional information that could be collected and reported # log_checkpoints = on # log_connections = on # log_disconnections = on # log_lock_waits = on # log_temp_files = 0 # log_autovacuum_min_duration = 0 #------------------------------------------------------------------------------ use vars qw($VERSION); use strict qw(vars subs); use Getopt::Long qw(:config no_ignore_case bundling); use IO::File; use Benchmark; use File::Basename; use Storable qw(store_fd fd_retrieve); use Time::Local 'timegm_nocheck'; use POSIX qw(locale_h sys_wait_h _exit); setlocale(LC_NUMERIC, ''); setlocale(LC_ALL, 'C'); use File::Spec qw/ tmpdir /; use File::Temp qw/ tempfile /; use IO::Handle; use IO::Pipe; use Time::HiRes qw/usleep/; $VERSION = '3.3'; $SIG{'CHLD'} = 'DEFAULT'; my $TMP_DIR = File::Spec->tmpdir() || '/tmp'; my %RUNNING_PIDS = (); my @tempfiles = (); my $parent_pid = $$; my $interrupt = 0; my $tmp_last_parsed = ''; #### # method used to fork as many child as wanted ## sub spawn { my $coderef = shift; unless (@_ == 0 && $coderef && ref($coderef) eq 'CODE') { print "usage: spawn CODEREF"; exit 0; } my $pid; if (!defined($pid = fork)) { print STDERR "Error: cannot fork: $!\n"; return; } elsif ($pid) { $RUNNING_PIDS{$pid} = $pid; return; # the parent } # the child -- go spawn $< = $>; $( = $); # suid progs only exit &$coderef(); } # Informa the parent that it should stop iterate on parsing other files sub stop_parsing { $interrupt = 1; } # With multiprocess we need to wait all childs sub wait_child { my $sig = shift; print STDERR "Received terminating signal ($sig).\n"; if ($^O !~ /MSWin32|dos/i) { 1 while wait != -1; $SIG{INT} = \&wait_child; $SIG{TERM} = \&wait_child; foreach my $f (@tempfiles) { unlink("$f->[1]") if (-e "$f->[1]"); } unlink("$tmp_last_parsed") if ($tmp_last_parsed); } _exit(0); } $SIG{INT} = \&wait_child; $SIG{TERM} = \&wait_child; $SIG{USR2} = \&stop_parsing; $| = 1; # Command line options my $zcat_cmd = 'gunzip -c'; my $zcat = $zcat_cmd; my $bzcat = 'bunzip2 -c'; my $ucat = 'unzip -p'; my $gzip_uncompress_size = "gunzip -l %f | grep -E '^\\s*[0-9]+' | awk '{print \$2}'"; my $zip_uncompress_size = "unzip -l %f | awk '{if (NR==4) print \$1}'"; my $format = ''; my $outfile = ''; my $outdir = ''; my $help = ''; my $ver = ''; my @dbname = (); my @dbuser = (); my @dbclient = (); my @dbclient2 = (); my @dbappname = (); my @exclude_user = (); my $ident = ''; my $top = 0; my $sample = 0; my $extension = ''; my $maxlength = 0; my $graph = 1; my $nograph = 0; my $debug = 0; my $nohighlight = 0; my $noprettify = 0; my $from = ''; my $to = ''; my $quiet = 0; my $progress = 1; my $error_only = 0; my @exclude_query = (); my $exclude_file = ''; my @include_query = (); my $include_file = ''; my $disable_error = 0; my $disable_hourly = 0; my $disable_type = 0; my $disable_query = 0; my $disable_session = 0; my $disable_connection = 0; my $disable_lock = 0; my $disable_temporary = 0; my $disable_checkpoint = 0; my $disable_autovacuum = 0; my $avg_minutes = 5; my $last_parsed = ''; my $report_title = 'pgBadger: PostgreSQL log analyzer'; my $log_line_prefix = ''; my $compiled_prefix = ''; my $project_url = 'http://dalibo.github.com/pgbadger/'; my $t_min = 0; my $t_max = 0; my $t_min_hour = 0; my $t_max_hour = 0; my $remove_comment = 0; my $select_only = 0; my $tsung_queries = 0; my $queue_size = 0; my $job_per_file = 0; my $NUMPROGRESS = 10000; my @DIMENSIONS = (800, 300); my $RESRC_URL = ''; my $img_format = 'png'; my @log_files = (); my %prefix_vars = (); my $sql_prettified; # Do not display data in pie where percentage is lower than this value # to avoid label overlapping. my $pie_percentage_limit = 2; # Get the decimal separator my $n = 5 / 2; my $num_sep = ','; $num_sep = ' ' if ($n =~ /,/); # get the command line parameters my $result = GetOptions( "a|average=i" => \$avg_minutes, "b|begin=s" => \$from, "c|dbclient=s" => \@dbclient, "C|nocomment!" => \$remove_comment, "d|dbname=s" => \@dbname, "e|end=s" => \$to, "f|format=s" => \$format, "G|nograph!" => \$nograph, "h|help!" => \$help, "i|ident=s" => \$ident, "j|jobs=i" => \$queue_size, "J|job_per_file=i" => \$job_per_file, "l|last-parsed=s" => \$last_parsed, "m|maxlength=i" => \$maxlength, "N|appname=s" => \@dbappname, "n|nohighlight!" => \$nohighlight, "o|outfile=s" => \$outfile, "p|prefix=s" => \$log_line_prefix, "P|no-prettify!" => \$noprettify, "q|quiet!" => \$quiet, "s|sample=i" => \$sample, "S|select-only!" => \$select_only, "t|top=i" => \$top, "T|title=s" => \$report_title, "u|dbuser=s" => \@dbuser, "U|exclude-user=s" => \@exclude_user, "v|verbose!" => \$debug, "V|version!" => \$ver, "w|watch-mode!" => \$error_only, "x|extension=s" => \$extension, "z|zcat=s" => \$zcat, "pie-limit=i" => \$pie_percentage_limit, "image-format=s" => \$img_format, "exclude-query=s" => \@exclude_query, "exclude-file=s" => \$exclude_file, "include-query=s" => \@include_query, "include-file=s" => \$include_file, "disable-error!" => \$disable_error, "disable-hourly!" => \$disable_hourly, "disable-type!" => \$disable_type, "disable-query!" => \$disable_query, "disable-session!" => \$disable_session, "disable-connection!" => \$disable_connection, "disable-lock!" => \$disable_lock, "disable-temporary!" => \$disable_temporary, "disable-checkpoint!" => \$disable_checkpoint, "disable-autovacuum!" => \$disable_autovacuum, "client=s" => \@dbclient2, # Backward compatibility ); die "FATAL: use pgbadger --help\n" if (not $result); push(@dbclient, @dbclient2); # Backward compatibility if ($ver) { print "pgBadger version $VERSION\n"; exit 0; } &usage() if ($help); # Rewrite some command line argument as lists &compute_arg_list(); # Log file to be parsed are passed as command line argument if ($#ARGV >= 0) { foreach my $file (@ARGV) { if ($file ne '-') { die "FATAL: logfile $file must exist!\n" if not -f $file; if (-z $file) { print "WARNING: file $file is empty\n"; next; } } push(@log_files, $file); } } # Logfile is a mandatory parameter if ($#log_files < 0) { print STDERR "FATAL: you must give a log file as command line parameter.\n\n"; &usage(); } # Quiet mode is forced with progress bar $progress = 0 if ($quiet); # Set the default number minutes for queries and connections average $avg_minutes ||= 5; $avg_minutes = 60 if ($avg_minutes > 60); $avg_minutes = 1 if ($avg_minutes < 1); # Set syslog prefix regex my $other_syslog_line = qr/^(...)\s+(\d+)\s(\d+):(\d+):(\d+)(?:\s[^\s]+)?\s([^\s]+)\s([^\s\[]+)\[(\d+)\]:(?:\s\[[^\]]+\])?\s\[(\d+)\-\d+\]\s*(.*)/; my $orphan_syslog_line = qr/^(...)\s+(\d+)\s(\d+):(\d+):(\d+)(?:\s[^\s]+)?\s([^\s]+)\s([^\s\[]+)\[(\d+)\]:/; my $orphan_stderr_line = ''; # Set default format $format ||= &autodetect_format($log_files[0]); if ($format eq 'syslog2') { $other_syslog_line = qr/^(\d+)-(\d+)-(\d+)T(\d+):(\d+):(\d+)(?:.[^\s]+)?\s([^\s]+)\s([^\s\[]+)\[(\d+)\]:(?:\s\[[^\]]+\])?\s\[(\d+)\-\d+\]\s*(.*)/; $orphan_syslog_line = qr/^(\d+)-(\d+)-(\d+)T(\d+):(\d+):(\d+)(?:.[^\s]+)?\s([^\s]+)\s([^\s\[]+)\[(\d+)\]:/; } # Set default top query $top ||= 20; # Set the default number of samples $sample ||= 3; # Set the default extension and output format if (!$extension) { if ($outfile =~ /\.bin/i) { $extension = 'binary'; } elsif ($outfile =~ /\.tsung/i) { $extension = 'tsung'; } elsif ($outfile =~ /\.htm[l]*/i) { $extension = 'html'; } elsif ($outfile) { $extension = 'txt'; } else { $extension = 'html'; } } # Set default filename of the output file $outfile ||= 'out.' . $extension; &logmsg('DEBUG', "Output '$extension' reports will be written to $outfile"); # Set default syslog ident name $ident ||= 'postgres'; # Set default pie percentage limit or fix value $pie_percentage_limit = 0 if ($pie_percentage_limit < 0); $pie_percentage_limit = 2 if ($pie_percentage_limit eq ''); $pie_percentage_limit = 100 if ($pie_percentage_limit > 100); # Set default download image format $img_format = lc($img_format); $img_format = 'jpeg' if ($img_format eq 'jpg'); $img_format = 'png' if ($img_format ne 'jpeg'); # Extract the output directory from outfile so that graphs will # be created in the same directory my @infs = fileparse($outfile); $outdir = $infs[1] . '/'; # Remove graph support if output is not html $graph = 0 unless ($extension eq 'html' or $extension eq 'binary' ); $graph = 0 if ($nograph); # Set some default values my $end_top = $top - 1; $queue_size ||= 1; $job_per_file ||= 1; if ($^O =~ /MSWin32|dos/i) { if ( ($queue_size > 1) || ($job_per_file > 1) ) { print STDERR "WARNING: parallel processing is not supported on this platform.\n"; $queue_size = 1; $job_per_file = 1; } } if ($extension eq 'tsung') { # Open filehandle my $fh = new IO::File ">$outfile"; if (not defined $fh) { die "FATAL: can't write to $outfile, $!\n"; } print $fh "\n"; $fh->close(); } else { # Test file creation before going to parse log my $tmpfh = new IO::File ">$outfile"; if (not defined $tmpfh) { die "FATAL: can't write to $outfile, $!\n"; } $tmpfh->close(); unlink($outfile) if (-e $outfile); } # -w and --disable-error can't go together if ($error_only && $disable_error) { die "FATAL: please choose between no event report and reporting events only.\n"; } # Set default search pattern for database and user name in log_line_prefix my $regex_prefix_dbname = qr/db=([^,]*)/; my $regex_prefix_dbuser = qr/user=([^,]*)/; # Loading excluded query from file if any if ($exclude_file) { open(IN, "$exclude_file") or die "FATAL: can't read file $exclude_file: $!\n"; my @exclq = ; close(IN); chomp(@exclq); map {s/\r//;} @exclq; foreach my $r (@exclq) { &check_regex($r, '--exclude-file'); } push(@exclude_query, @exclq); } # Testing regex syntax if ($#exclude_query >= 0) { foreach my $r (@exclude_query) { &check_regex($r, '--exclude-query'); } } # Loading included query from file if any if ($include_file) { open(IN, "$include_file") or die "FATAL: can't read file $include_file: $!\n"; my @exclq = ; close(IN); chomp(@exclq); map {s/\r//;} @exclq; foreach my $r (@exclq) { &check_regex($r, '--include-file'); } push(@include_query, @exclq); } # Testing regex syntax if ($#include_query >= 0) { foreach my $r (@include_query) { &check_regex($r, '--include-query'); } } my @action_regex = ( qr/^\s*(delete) from/is, qr/^\s*(insert) into/is, qr/^\s*(update) .*\bset\b/is, qr/^\s*(select) /is ); # Compile custom log line prefix prefix my @prefix_params = (); if ($log_line_prefix) { # Build parameters name that will be extracted from the prefix regexp @prefix_params = &build_log_line_prefix_regex(); &check_regex($log_line_prefix, '--prefix'); if ($format eq 'syslog') { $log_line_prefix = '^(...)\s+(\d+)\s(\d+):(\d+):(\d+)(?:\s[^\s]+)?\s([^\s]+)\s([^\s\[]+)\[(\d+)\]:(?:\s\[[^\]]+\])?\s\[(\d+)\-\d+\]\s*' . $log_line_prefix . '\s*(LOG|WARNING|ERROR|FATAL|PANIC|DETAIL|STATEMENT|HINT|CONTEXT):\s+(.*)'; $compiled_prefix = qr/$log_line_prefix/; unshift(@prefix_params, 't_month', 't_day', 't_hour', 't_min', 't_sec', 't_host', 't_ident', 't_pid', 't_session_line'); push(@prefix_params, 't_loglevel', 't_query'); } elsif ($format eq 'syslog2') { $format = 'syslog'; $log_line_prefix = '^(\d+)-(\d+)-(\d+)T\d+:\d+:\d+(?:.[^\s]+)?\s([^\s]+)\s([^\s\[]+)\[(\d+)\]:(?:\s\[[^\]]+\])?\s\[(\d+)\-\d+\]\s*' . $log_line_prefix . '\s*(LOG|WARNING|ERROR|FATAL|PANIC|DETAIL|STATEMENT|HINT|CONTEXT):\s+(.*)'; $compiled_prefix = qr/$log_line_prefix/; unshift(@prefix_params, 't_year', 't_month', 't_day', 't_hour', 't_min', 't_sec', 't_host', 't_ident', 't_pid', 't_session_line'); push(@prefix_params, 't_loglevel', 't_query'); } elsif ($format eq 'stderr') { $orphan_stderr_line = qr/$log_line_prefix/; $log_line_prefix = '^' . $log_line_prefix . '\s*(LOG|WARNING|ERROR|FATAL|PANIC|DETAIL|STATEMENT|HINT|CONTEXT):\s+(.*)'; $compiled_prefix = qr/$log_line_prefix/; push(@prefix_params, 't_loglevel', 't_query'); } } elsif ($format eq 'syslog') { $compiled_prefix = qr/^(...)\s+(\d+)\s(\d+):(\d+):(\d+)(?:\s[^\s]+)?\s([^\s]+)\s([^\s\[]+)\[(\d+)\]:(?:\s\[[^\]]+\])?\s\[(\d+)\-\d+\]\s*(.*?)\s*(LOG|WARNING|ERROR|FATAL|PANIC|DETAIL|STATEMENT|HINT|CONTEXT):\s+(.*)/; push(@prefix_params, 't_month', 't_day', 't_hour', 't_min', 't_sec', 't_host', 't_ident', 't_pid', 't_session_line', 't_logprefix', 't_loglevel', 't_query'); } elsif ($format eq 'syslog2') { $format = 'syslog'; $compiled_prefix = qr/^(\d+)-(\d+)-(\d+)T(\d+):(\d+):(\d+)(?:.[^\s]+)?\s([^\s]+)\s([^\s\[]+)\[(\d+)\]:(?:\s\[[^\]]+\])?\s\[(\d+)\-\d+\]\s*(.*?)\s*(LOG|WARNING|ERROR|FATAL|PANIC|DETAIL|STATEMENT|HINT|CONTEXT):\s+(.*)/; push(@prefix_params, 't_year', 't_month', 't_day', 't_hour', 't_min', 't_sec', 't_host', 't_ident', 't_pid', 't_session_line', 't_logprefix', 't_loglevel', 't_query'); } elsif ($format eq 'stderr') { $compiled_prefix = qr/^(\d+-\d+-\d+\s\d+:\d+:\d+)[\.\d]*(?: [A-Z\d]{3,6})?\s\[(\d+)\]:\s\[(\d+)\-\d+\]\s*(.*?)\s*(LOG|WARNING|ERROR|FATAL|PANIC|DETAIL|STATEMENT|HINT|CONTEXT):\s+(.*)/; push(@prefix_params, 't_timestamp', 't_pid', 't_session_line', 't_logprefix', 't_loglevel', 't_query'); $orphan_stderr_line = qr/^(\d+-\d+-\d+\s\d+:\d+:\d+)[\.\d]*(?: [A-Z\d]{3,6})?\s\[(\d+)\]:\s\[(\d+)\-\d+\]\s*(.*?)\s*/; } sub check_regex { my ($pattern, $varname) = @_; eval {m/$pattern/i;}; if ($@) { die "FATAL: '$varname' invalid regex '$pattern', $!\n"; } } # Check start/end date time if ($from) { if ($from !~ /^(\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2})$/) { die "FATAL: bad format for begin datetime, should be yyyy-mm-dd hh:mm:ss\n"; } } if ($to) { if ($to !~ /^(\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2})$/) { die "FATAL: bad format for ending datetime, should be yyyy-mm-dd hh:mm:ss\n"; } } # Stores the last parsed line from log file to allow incremental parsing my $LAST_LINE = ''; # Set the level of the data aggregator, can be minute, hour or day follow the # size of the log file. my $LEVEL = 'hour'; # Month names my %month_abbr = ( 'Jan' => '01', 'Feb' => '02', 'Mar' => '03', 'Apr' => '04', 'May' => '05', 'Jun' => '06', 'Jul' => '07', 'Aug' => '08', 'Sep' => '09', 'Oct' => '10', 'Nov' => '11', 'Dec' => '12' ); my %abbr_month = ( '01' => 'Jan', '02' => 'Feb', '03' => 'Mar', '04' => 'Apr', '05' => 'May', '06' => 'Jun', '07' => 'Jul', '08' => 'Aug', '09' => 'Sep', '10' => 'Oct', '11' => 'Nov', '12' => 'Dec' ); # Keywords variable my @pg_keywords = qw( ALL ANALYSE ANALYZE AND ANY ARRAY AS ASC ASYMMETRIC AUTHORIZATION BINARY BOTH CASE CAST CHECK COLLATE COLLATION COLUMN CONCURRENTLY CONSTRAINT CREATE CROSS CURRENT_DATE CURRENT_ROLE CURRENT_TIME CURRENT_TIMESTAMP CURRENT_USER DEFAULT DEFERRABLE DESC DISTINCT DO ELSE END EXCEPT FALSE FETCH FOR FOREIGN FREEZE FROM FULL GRANT GROUP HAVING ILIKE IN INITIALLY INNER INTERSECT INTO IS ISNULL JOIN LEADING LEFT LIKE LIMIT LOCALTIME LOCALTIMESTAMP NATURAL NOT NOTNULL NULL ON ONLY OPEN OR ORDER OUTER OVER OVERLAPS PLACING PRIMARY REFERENCES RETURNING RIGHT SELECT SESSION_USER SIMILAR SOME SYMMETRIC TABLE THEN TO TRAILING TRUE UNION UNIQUE USER USING VARIADIC VERBOSE WHEN WHERE WINDOW WITH ); # Highlight variables my @KEYWORDS1 = qw( ALTER ADD AUTO_INCREMENT BETWEEN BY BOOLEAN BEGIN CHANGE COLUMNS COMMIT COALESCE CLUSTER COPY DATABASES DATABASE DATA DELAYED DESCRIBE DELETE DROP ENCLOSED ESCAPED EXISTS EXPLAIN FIELDS FIELD FLUSH FUNCTION GREATEST IGNORE INDEX INFILE INSERT IDENTIFIED IF INHERIT KEYS KILL KEY LINES LOAD LOCAL LOCK LOW_PRIORITY LANGUAGE LEAST LOGIN MODIFY NULLIF NOSUPERUSER NOCREATEDB NOCREATEROLE OPTIMIZE OPTION OPTIONALLY OUTFILE OWNER PROCEDURE PROCEDURAL READ REGEXP RENAME RETURN REVOKE RLIKE ROLE ROLLBACK SHOW SONAME STATUS STRAIGHT_JOIN SET SEQUENCE TABLES TEMINATED TRUNCATE TEMPORARY TRIGGER TRUSTED UNLOCK USE UPDATE UNSIGNED VALUES VARIABLES VIEW VACUUM WRITE ZEROFILL XOR ABORT ABSOLUTE ACCESS ACTION ADMIN AFTER AGGREGATE ALSO ALWAYS ASSERTION ASSIGNMENT AT ATTRIBUTE BACKWARD BEFORE BIGINT CACHE CALLED CASCADE CASCADED CATALOG CHAIN CHARACTER CHARACTERISTICS CHECKPOINT CLOSE COMMENT COMMENTS COMMITTED CONFIGURATION CONNECTION CONSTRAINTS CONTENT CONTINUE CONVERSION COST CSV CURRENT CURSOR CYCLE DAY DEALLOCATE DEC DECIMAL DECLARE DEFAULTS DEFERRED DEFINER DELIMITER DELIMITERS DICTIONARY DISABLE DISCARD DOCUMENT DOMAIN DOUBLE EACH ENABLE ENCODING ENCRYPTED ENUM ESCAPE EXCLUDE EXCLUDING EXCLUSIVE EXECUTE EXTENSION EXTERNAL FIRST FLOAT FOLLOWING FORCE FORWARD FUNCTIONS GLOBAL GRANTED HANDLER HEADER HOLD HOUR IDENTITY IMMEDIATE IMMUTABLE IMPLICIT INCLUDING INCREMENT INDEXES INHERITS INLINE INOUT INPUT INSENSITIVE INSTEAD INT INTEGER INVOKER ISOLATION LABEL LARGE LAST LC_COLLATE LC_CTYPE LEAKPROOF LEVEL LISTEN LOCATION LOOP MAPPING MATCH MAXVALUE MINUTE MINVALUE MODE MONTH MOVE NAMES NATIONAL NCHAR NEXT NO NONE NOTHING NOTIFY NOWAIT NULLS OBJECT OF OFF OIDS OPERATOR OPTIONS OUT OWNED PARSER PARTIAL PARTITION PASSING PASSWORD PLANS PRECEDING PRECISION PREPARE PREPARED PRESERVE PRIOR PRIVILEGES QUOTE RANGE REAL REASSIGN RECHECK RECURSIVE REF REINDEX RELATIVE RELEASE REPEATABLE REPLICA RESET RESTART RESTRICT RETURNS ROW ROWS RULE SAVEPOINT SCHEMA SCROLL SEARCH SECOND SECURITY SEQUENCES SERIALIZABLE SERVER SESSION SETOF SHARE SIMPLE SMALLINT SNAPSHOT STABLE STANDALONE START STATEMENT STATISTICS STORAGE STRICT SYSID SYSTEM TABLESPACE TEMP TEMPLATE TRANSACTION TREAT TYPE TYPES UNBOUNDED UNCOMMITTED UNENCRYPTED UNKNOWN UNLISTEN UNLOGGED UNTIL VALID VALIDATE VALIDATOR VALUE VARYING VOLATILE WHITESPACE WITHOUT WORK WRAPPER XMLATTRIBUTES XMLCONCAT XMLELEMENT XMLEXISTS XMLFOREST XMLPARSE XMLPI XMLROOT XMLSERIALIZE YEAR YES ZONE ); foreach my $k (@pg_keywords) { push(@KEYWORDS1, $k) if (!grep(/^$k$/i, @KEYWORDS1)); } my @KEYWORDS2 = ( 'ascii', 'age', 'bit_length', 'btrim', 'char_length', 'character_length', 'convert', 'chr', 'current_date', 'current_time', 'current_timestamp', 'count', 'decode', 'date_part', 'date_trunc', 'encode', 'extract', 'get_byte', 'get_bit', 'initcap', 'isfinite', 'interval', 'justify_hours', 'justify_days', 'lower', 'length', 'lpad', 'ltrim', 'localtime', 'localtimestamp', 'md5', 'now', 'octet_length', 'overlay', 'position', 'pg_client_encoding', 'quote_ident', 'quote_literal', 'repeat', 'replace', 'rpad', 'rtrim', 'substring', 'split_part', 'strpos', 'substr', 'set_byte', 'set_bit', 'trim', 'to_ascii', 'to_hex', 'translate', 'to_char', 'to_date', 'to_timestamp', 'to_number', 'timeofday', 'upper', ); my @KEYWORDS3 = ('STDIN', 'STDOUT'); my %SYMBOLS = ( '=' => '=', '<' => '<', '>' => '>', '\|' => '|', ',' => ',', '\.' => '.', '\+' => '+', '\-' => '-', '\*' => '*', '\/' => '/', '!=' => '!=' ); my @BRACKETS = ('(', ')'); map {$_ = quotemeta($_)} @BRACKETS; # Where statistics are stored my %overall_stat = (); my @top_slowest = (); my %normalyzed_info = (); my %error_info = (); my %logs_type = (); my %per_hour_info = (); my %per_minute_info = (); my %lock_info = (); my %tempfile_info = (); my %connection_info = (); my %database_info = (); my %application_info = (); my %session_info = (); my %conn_received = (); my %checkpoint_info = (); my %restartpoint_info = (); my %autovacuum_info = (); my %autoanalyze_info = (); my @graph_values = (); my %cur_info = (); my %cur_temp_info = (); my %cur_lock_info = (); my $nlines = 0; my %last_line = (); our %saved_last_line = (); my %tsung_session = (); my @top_locked_info = (); my @top_tempfile_info = (); my $t0 = Benchmark->new; # Reading last line parsed if ($last_parsed && -e $last_parsed) { if (open(IN, "$last_parsed")) { my $line = ; close(IN); ($saved_last_line{datetime}, $saved_last_line{orig}) = split(/\t/, $line, 2); } else { die "FATAL: can't read last parsed line from $last_parsed, $!\n"; } } $tmp_last_parsed = 'tmp_' . $last_parsed if ($last_parsed); # Main loop reading log files my $global_totalsize = 0; my @given_log_files = ( @log_files ); # log files must be erase when loading stats from binary format if ($format eq 'binary') { $queue_size = 1; $job_per_file = 1; @log_files = (); } my $pipe; # Start parsing all given files using multiprocess if ( ($queue_size > 1) || ($job_per_file > 1) ) { # Number of running process my $child_count = 0; # Set max number of parallel process my $parallel_process = $queue_size; if ($job_per_file > 1) { $parallel_process = $job_per_file; } # Store total size of the log files foreach my $logfile ( @given_log_files ) { $global_totalsize += &get_log_file($logfile); } # Open a pipe for interprocess communication my $reader = new IO::Handle; my $writer = new IO::Handle; $pipe = IO::Pipe->new($reader, $writer); $writer->autoflush(1); # Fork the logger process if ($progress) { spawn sub { &multiprocess_progressbar($global_totalsize); }; } # Parse each log file following the multiprocess mode chosen (-j or -J) foreach my $logfile ( @given_log_files ) { while ($child_count >= $parallel_process) { my $kid = waitpid(-1, WNOHANG); if ($kid > 0) { $child_count--; delete $RUNNING_PIDS{$kid}; } usleep(500000); } # Do not use split method with compressed files if ( ($queue_size > 1) && ($logfile !~ /\.(gz|bz2|zip)/i) ) { # Create multiple process to parse one log file by chunks of data my @chunks = &split_logfile($logfile); for (my $i = 0; $i < $#chunks; $i++) { while ($child_count >= $parallel_process) { my $kid = waitpid(-1, WNOHANG); if ($kid > 0) { $child_count--; delete $RUNNING_PIDS{$kid}; } usleep(500000); } push(@tempfiles, [ tempfile('tmp_pgbadgerXXXX', SUFFIX => '.bin', DIR => $TMP_DIR, UNLINK => 1 ) ]); spawn sub { &process_file($logfile, $tempfiles[-1]->[0], $chunks[$i], $chunks[$i+1]); }; $child_count++; } } else { # Start parsing one file per parallel process push(@tempfiles, [ tempfile('tmp_pgbadgerXXXX', SUFFIX => '.bin', DIR => $TMP_DIR, UNLINK => 1 ) ]); spawn sub { &process_file($logfile, $tempfiles[-1]->[0]); }; $child_count++; } last if ($interrupt); } my $minproc = 1; $minproc = 0 if (!$progress); # Wait for all child dies less the logger while (scalar keys %RUNNING_PIDS > $minproc) { my $kid = waitpid(-1, WNOHANG); if ($kid > 0) { delete $RUNNING_PIDS{$kid}; } usleep(500000); } # Terminate the process logger foreach my $k (keys %RUNNING_PIDS) { kill(10, $k); %RUNNING_PIDS = (); } # Load all data gathered by all the differents processes &init_stats_vars(); foreach my $f (@tempfiles) { next if (!-e "$f->[1]" || -z "$f->[1]"); my $fht = new IO::File; $fht->open("< $f->[1]") or die "FATAL: can't open file $f->[1], $!\n"; &load_stats($fht); $fht->close(); } # Get last line parsed from all process if ($last_parsed) { if (open(IN, "$tmp_last_parsed") ) { while (my $line = ) { chomp($line); my ($d, $l) = split(/\t/, $line, 2); if (!$last_line{datetime} || ($d gt $last_line{datetime})) { $last_line{datetime} = $d; $last_line{orig} = $l; } } close(IN); } unlink("$tmp_last_parsed"); } } else { # Multiprocessing disabled, parse log files one by one foreach my $logfile ( @given_log_files ) { last if (&process_file($logfile)); } } # Save last line parsed if ($last_parsed && scalar keys %last_line) { if (open(OUT, ">$last_parsed")) { print OUT "$last_line{datetime}\t$last_line{orig}\n"; close(OUT); } else { &logmsg('ERROR', "can't save last parsed line into $last_parsed, $!"); } } my $t1 = Benchmark->new; my $td = timediff($t1, $t0); &logmsg('DEBUG', "the log statistics gathering took:" . timestr($td)); &logmsg('LOG', "Ok, generating $extension report..."); # Open filehandle my $fh = undef; if ($extension ne 'tsung') { $fh = new IO::File ">$outfile"; if (not defined $fh) { die "FATAL: can't write to $outfile, $!\n"; } if (($extension eq 'text') || ($extension eq 'txt')) { if ($error_only) { &dump_error_as_text(); } else { &dump_as_text(); } } elsif ($extension eq 'binary') { &dump_as_binary($fh); } else { # Create instance to prettify SQL query if (!$noprettify) { $sql_prettified = SQL::Beautify->new(keywords => \@pg_keywords); } if ($error_only) { &dump_error_as_html(); } else { &dump_as_html(); } } $fh->close; } else { # Open filehandle $fh = new IO::File ">>$outfile"; if (not defined $fh) { die "FATAL: can't write to $outfile, $!\n"; } print $fh "\n"; $fh->close(); } my $t2 = Benchmark->new; $td = timediff($t2, $t1); &logmsg('DEBUG', "building reports took:" . timestr($td)); $td = timediff($t2, $t0); &logmsg('DEBUG', "the total execution time took:" . timestr($td)); exit 0; #------------------------------------------------------------------------------- # Show pgBadger command line usage sub usage { print qq{ Usage: pgbadger [options] logfile [...] PostgreSQL log analyzer with fully detailed reports and graphs. Arguments: logfile can be a single log file, a list of files, or a shell command returning a list of files. If you want to pass log content from stdin use - as filename. Note that input from stdin will not work with csvlog. Options: -a | --average minutes : number of minutes to build the average graphs of queries and connections. -b | --begin datetime : start date/time for the data to be parsed in log. -c | --dbclient host : only report on entries for the given client host. -C | --nocomment : remove comments like /* ... */ from queries. -d | --dbname database : only report on entries for the given database. -e | --end datetime : end date/time for the data to be parsed in log. -f | --format logtype : possible values: syslog,stderr,csv. Default: stderr. -G | --nograph : disable graphs on HTML output. Enable by default. -h | --help : show this message and exit. -i | --ident name : programname used as syslog ident. Default: postgres -j | --jobs number : number of jobs to run at same time. Default is 1, run as single process. -l | --last-parsed file: allow incremental log parsing by registering the last datetime and line parsed. Useful if you want to watch errors since last run or if you want one report per day with a log rotated each week. -m | --maxlength size : maximum length of a query, it will be restricted to the given size. Default: no truncate -n | --nohighlight : disable SQL code highlighting. -N | --appname name : only report on entries for given application name -o | --outfile filename: define the filename for the output. Default depends on the output format: out.html, out.txt or out.tsung. To dump output to stdout use - as filename. -p | --prefix string : give here the value of your custom log_line_prefix defined in your postgresql.conf. Only use it if you aren't using one of the standard prefixes specified in the pgBadger documentation, such as if your prefix includes additional variables like client ip or application name. See examples below. -P | --no-prettify : disable SQL queries prettify formatter. -q | --quiet : don't print anything to stdout, even not a progress bar. -s | --sample number : number of query samples to store/display. Default: 3 -S | --select-only : use it if you want to report select queries only. -t | --top number : number of queries to store/display. Default: 20 -T | --title string : change title of the HTML page report. -u | --dbuser username : only report on entries for the given user. -U | --exclude-user username : exclude entries for the specified user from report. -v | --verbose : enable verbose or debug mode. Disabled by default. -V | --version : show pgBadger version and exit. -w | --watch-mode : only report errors just like logwatch could do. -x | --extension : output format. Values: text, html or tsung. Default: html -z | --zcat exec_path : set the full path to the zcat program. Use it if zcat or bzcat or unzip is not on your path. --pie-limit num : pie data lower than num% will show a sum instead. --exclude-query regex : any query matching the given regex will be excluded from the report. For example: "^(VACUUM|COMMIT)" You can use this option multiple times. --exclude-file filename: path of the file which contains all the regex to use to exclude queries from the report. One regex per line. --include-query regex : any query that does not match the given regex will be excluded from the report. For example: "(table_1|table_2)" You can use this option multiple times. --include-file filename: path of the file which contains all the regex of the queries to include from the report. One regex per line. --disable-error : do not generate error report. --disable-hourly : do not generate hourly report. --disable-type : do not generate query type report. --disable-query : do not generate query reports (slowest, most frequent, ...). --disable-session : do not generate session report. --disable-connection : do not generate connection report. --disable-lock : do not generate lock report. --disable-temporary : do not generate temporary report. --disable-checkpoint : do not generate checkpoint/restartpoint report. --disable-autovacuum : do not generate autovacuum report. Examples: pgbadger /var/log/postgresql.log pgbadger /var/log/postgres.log.2.gz /var/log/postgres.log.1.gz \ /var/log/postgres.log pgbadger /var/log/postgresql/postgresql-2012-05-* pgbadger --exclude-query="^(COPY|COMMIT)" /var/log/postgresql.log pgbadger -b "2012-06-25 10:56:11" -e "2012-06-25 10:59:11" \ /var/log/postgresql.log cat /var/log/postgres.log | pgbadger - # log prefix with stderr log output perl pgbadger --prefix '%t [%p]: [%l-1] user=%u,db=%d,client=%h' \ /pglog/postgresql-2012-08-21* perl pgbadger --prefix '%m %u@%d %p %r %a : ' /pglog/postgresql.log # Log line prefix with syslog log output perl pgbadger --prefix 'user=%u,db=%d,client=%h,appname=%a' \ /pglog/postgresql-2012-08-21* # Use my 8 CPUs to parse my 10GB file faster, really faster perl pgbadger -j 8 /pglog/postgresql-9.1-main.log Generate Tsung sessions XML file with select queries only: perl pgbadger -S -o sessions.tsung --prefix '%t [%p]: [%l-1] user=%u,db=%d ' /pglog/postgresql-9.1.log Reporting errors every week by cron job: 30 23 * * 1 /usr/bin/pgbadger -q -w /var/log/postgresql.log -o /var/reports/pg_errors.html Generate report every week using incremental behavior: 0 4 * * 1 /usr/bin/pgbadger -q `find /var/log/ -mtime -7 -name "postgresql.log*"` \ -o /var/reports/pg_errors-`date +%F`.html -l /var/reports/pgbadger_incremental_file.dat This supposes that your log file and HTML report are also rotated every week. }; exit 0; } sub init_stats_vars { # Empty where statistics are stored %overall_stat = (); @top_slowest = (); %normalyzed_info = (); %error_info = (); %logs_type = (); %per_hour_info = (); %per_minute_info = (); %lock_info = (); %tempfile_info = (); %connection_info = (); %database_info = (); %application_info = (); %session_info = (); %conn_received = (); %checkpoint_info = (); %restartpoint_info = (); %autovacuum_info = (); %autoanalyze_info = (); @graph_values = (); %cur_info = (); $nlines = 0; %tsung_session = (); } #### # Main function called per each parser process #### sub multiprocess_progressbar { my $totalsize = shift; &logmsg('DEBUG', "Starting progressbar writer process"); $0 = 'pgbadger logger'; # Terminate the process when we doesn't read the complete file but must exit local $SIG{USR1} = sub { print STDERR "\n"; exit 0; }; my $timeout = 3; my $cursize = 0; my $nqueries = 0; my $nerrors = 0; $pipe->reader(); while (my $r = <$pipe>) { chomp($r); my @infos = split(/\s+/, $r); $cursize += $infos[0]; $nqueries += $infos[1]; $nerrors += $infos[2]; $cursize = $totalsize if ($cursize > $totalsize); print STDERR &progress_bar($cursize, $totalsize, 25, '=', $nqueries, $nerrors); last if ($cursize >= $totalsize); } print STDERR "\n"; exit 0; } #### # Main function called per each parser process #### sub process_file { my ($logfile, $tmpoutfile, $start_offset, $stop_offset) = @_; my $old_queries_count = 0; my $old_errors_count = 0; my $current_offset = $start_offset || 0; my $getout = 0; $0 = 'pgbadger parser'; &init_stats_vars() if ($tmpoutfile); &logmsg('DEBUG', "Starting to parse log file: $logfile"); my $terminate = 0; local $SIG{INT} = sub { $terminate = 1 }; local $SIG{TERM} = sub { $terminate = 1 }; my $curdate = localtime(time); $pipe->writer() if (defined $pipe); # Syslog does not have year information, so take care of year overlapping my ($gsec, $gmin, $ghour, $gmday, $gmon, $gyear, $gwday, $gyday, $gisdst) = localtime(time); $gyear += 1900; my $CURRENT_DATE = $gyear . sprintf("%02d", $gmon + 1) . sprintf("%02d", $gmday); my $cursize = 0; # Get file handle and size of the file my ($lfile, $totalsize) = &get_log_file($logfile); if ($stop_offset > 0) { $totalsize = $stop_offset - $start_offset; } &logmsg('DEBUG', "Starting reading file $logfile..."); if ($format eq 'csv') { require Text::CSV_XS; my $csv = Text::CSV_XS->new({binary => 1, eol => $/}); # Parse csvlog lines while (my $row = $csv->getline($lfile)) { # We received a signal last if ($terminate); # Set progress statistics $cursize += length(join(',', @$row)); $nlines++; if (!$tmpoutfile) { if ($progress && (($nlines % $NUMPROGRESS) == 0)) { if ($totalsize) { print STDERR &progress_bar($cursize, $totalsize, 25, '='); } else { print STDERR "."; } } } else { if ($progress && (($nlines % $NUMPROGRESS) == 0)) { $pipe->print("$cursize " . ($overall_stat{'queries_number'} - $old_queries_count) . " " . ($overall_stat{'errors_number'} - $old_errors_count) . "\n"); $old_queries_count = $overall_stat{'queries_number'}; $old_errors_count = $overall_stat{'errors_number'}; $cursize = 0; } } # Process only relevant lines next if ($row->[11] !~ /^(LOG|WARNING|ERROR|FATAL|PANIC|DETAIL|STATEMENT|HINT|CONTEXT)$/); # Extract the date $row->[0] =~ m/^(\d+)-(\d+)-(\d+)\s+(\d+):(\d+):(\d+)\.(\d+)/; my $milli = $7 || 0; ($prefix_vars{'t_year'}, $prefix_vars{'t_month'}, $prefix_vars{'t_day'}, $prefix_vars{'t_hour'}, $prefix_vars{'t_min'}, $prefix_vars{'t_sec'}) = ($1, $2, $3, $4, $5, $6); $prefix_vars{'t_timestamp'} = "$1-$2-$3 $4:$5:$6"; # Skip unwanted lines next if ($from && ($from gt $prefix_vars{'t_timestamp'})); if ($to && ($to lt $prefix_vars{'t_timestamp'})) { if ($tmpoutfile) { $pipe->print("$cursize " . ($overall_stat{'queries_number'} - $old_queries_count) . " " . ($overall_stat{'errors_number'} - $old_errors_count) . "\n"); $old_queries_count = $overall_stat{'queries_number'}; $old_errors_count = $overall_stat{'errors_number'}; $cursize = 0; } $getout = 1; last; } # Jump to the last line parsed if required next if (!&check_incremental_position($prefix_vars{'t_timestamp'}, join(',', @$row))); # Store the current timestamp of the log line &store_current_timestamp($prefix_vars{'t_timestamp'}); # Set query parameters as global variables $prefix_vars{'t_dbuser'} = $row->[1] || ''; $prefix_vars{'t_dbname'} = $row->[2] || ''; $prefix_vars{'t_appname'} = $row->[22] || ''; $prefix_vars{'t_client'} = $row->[4] || ''; $prefix_vars{'t_client'} =~ s/:.*//; $prefix_vars{'t_host'} = 'csv'; $prefix_vars{'t_pid'} = $row->[3]; $prefix_vars{'t_session_line'} = $row->[5]; $prefix_vars{'t_session_line'} =~ s/\..*//; $prefix_vars{'t_loglevel'} = $row->[11]; $prefix_vars{'t_query'} = $row->[13]; # Set ERROR additional informations $prefix_vars{'t_detail'} = $row->[14]; $prefix_vars{'t_hint'} = $row->[15]; $prefix_vars{'t_context'} = $row->[18]; $prefix_vars{'t_statement'} = $row->[19]; # Check if the log line should be excluded from the report if (&validate_log_line($prefix_vars{'t_pid'})) { # Parse the query now &parse_query(); &store_queries($prefix_vars{'t_pid'}); delete $cur_info{$prefix_vars{'t_pid'}}; } } if (!$getout) { $csv->eof or warn "FATAL: cannot use CSV, " . $csv->error_diag() . "\n"; } } elsif ($format eq 'binary') { &load_stats($lfile); } else { # Format is not CSV. my $time_pattern = qr/(\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2})/; my $cur_pid = ''; my @matches = (); my $goon = 0; if ($start_offset) { $lfile->seek($start_offset, 0); } while (my $line = <$lfile>) { # We received a signal last if ($terminate); $cursize += length($line); $current_offset += length($line); chomp($line); $line =~ s/\r//; $nlines++; next if (!$line); if (!$tmpoutfile) { if ($progress && (($nlines % $NUMPROGRESS) == 0)) { if ($totalsize) { if ($stop_offset > 0) { print STDERR &progress_bar($cursize - $start_offset, $stop_offset, 25, '='); } else { print STDERR &progress_bar($cursize, $totalsize, 25, '='); } } else { print STDERR "."; } } } else { if ($progress && (($nlines % $NUMPROGRESS) == 0)) { $pipe->print("$cursize " . ($overall_stat{'queries_number'} - $old_queries_count) . " " . ($overall_stat{'errors_number'} - $old_errors_count) . "\n"); $old_queries_count = $overall_stat{'queries_number'}; $old_errors_count = $overall_stat{'errors_number'}; $cursize = 0; } } %prefix_vars = (); # Parse syslog lines if ($format =~ /syslog/) { @matches = ($line =~ $compiled_prefix); if ($#matches >= 0) { for (my $i = 0 ; $i <= $#prefix_params ; $i++) { $prefix_vars{$prefix_params[$i]} = $matches[$i]; } # skip non postgresql lines next if ($prefix_vars{'t_ident'} ne $ident); # Standard syslog format does not have year information, months are # three letters and day are not always with 2 digit. if ($prefix_vars{'t_month'} !~ /\d/) { $prefix_vars{'t_year'} = $gyear; $prefix_vars{'t_day'} = sprintf("%02d", $prefix_vars{'t_day'}); $prefix_vars{'t_month'} = $month_abbr{$prefix_vars{'t_month'}}; # Take care of year overlapping if ("$prefix_vars{'t_year'}$prefix_vars{'t_month'}$prefix_vars{'t_day'}" > $CURRENT_DATE) { $prefix_vars{'t_year'} = substr($CURRENT_DATE, 0, 4) - 1; } } $prefix_vars{'t_timestamp'} = "$prefix_vars{'t_year'}-$prefix_vars{'t_month'}-$prefix_vars{'t_day'} $prefix_vars{'t_hour'}:$prefix_vars{'t_min'}:$prefix_vars{'t_sec'}"; # Skip unwanted lines next if ($from && ($from gt $prefix_vars{'t_timestamp'})); if ($to && ($to lt $prefix_vars{'t_timestamp'})) { if ($tmpoutfile) { $pipe->print("$cursize " . ($overall_stat{'queries_number'} - $old_queries_count) . " " . ($overall_stat{'errors_number'} - $old_errors_count) . "\n"); $old_queries_count = $overall_stat{'queries_number'}; $old_errors_count = $overall_stat{'errors_number'}; $cursize = 0; } $getout = 1; last; } # Jump to the last line parsed if required next if (!&check_incremental_position($prefix_vars{'t_timestamp'}, $line)); $cur_pid = $prefix_vars{'t_pid'}; $goon = 1; # Store the current timestamp of the log line &store_current_timestamp($prefix_vars{'t_timestamp'}); # Extract information from log line prefix if (!$log_line_prefix) { &parse_log_prefix($prefix_vars{'t_logprefix'}); } # Check if the log line should be excluded from the report if (&validate_log_line($prefix_vars{'t_pid'})) { # Process the log line &parse_query(); } } elsif ($goon && ($line =~ $other_syslog_line)) { $cur_pid = $8; my $t_query = $10; $t_query = $11 if ($format eq 'syslog-ng'); $t_query =~ s/#011/\t/g; next if ($t_query eq "\t"); if ($cur_info{$cur_pid}{vacuum} && ($t_query =~ /^\t(pages|tuples|buffer usage|avg read rate|system usage):/)) { if ($t_query =~ /^\t(pages|tuples): (\d+) removed, (\d+) remain/) { $autovacuum_info{tables}{$cur_info{$cur_pid}{vacuum}}{$1}{removed} += $2; } next; } elsif ( $cur_info{$cur_pid}{parameters} && (($t_query =~ /[,\s]*\$(\d+)\s=\s/) || ($t_query =~ /^('[^']*')$/)) ) { # stores bind parameters if any $cur_info{$cur_pid}{parameters} .= " $t_query"; next; } if ($cur_info{$cur_pid}{statement}) { $cur_info{$cur_pid}{statement} .= "\n" . $t_query; } elsif ($cur_info{$cur_pid}{context}) { $cur_info{$cur_pid}{context} .= "\n" . $t_query; } elsif ($cur_info{$cur_pid}{detail}) { $cur_info{$cur_pid}{detail} .= "\n" . $t_query; } else { $cur_info{$cur_pid}{query} .= "\n" . $t_query; } # Collect orphans lines of multiline queries } elsif ($cur_pid && ($line !~ $orphan_syslog_line)) { if ($cur_info{$cur_pid}{statement}) { $cur_info{$cur_pid}{statement} .= "\n" . $line; } elsif ($cur_info{$cur_pid}{context}) { $cur_info{$cur_pid}{context} .= "\n" . $line; } elsif ($cur_info{$cur_pid}{detail}) { $cur_info{$cur_pid}{detail} .= "\n" . $line; } else { $cur_info{$cur_pid}{query} .= "\n" . $line; } } else { &logmsg('DEBUG', "Unknown syslog line format: $line"); } } elsif ($format eq 'stderr') { @matches = ($line =~ $compiled_prefix); if ($#matches >= 0) { for (my $i = 0 ; $i <= $#prefix_params ; $i++) { $prefix_vars{$prefix_params[$i]} = $matches[$i]; } if (!$prefix_vars{'t_timestamp'} && $prefix_vars{'t_mtimestamp'}) { $prefix_vars{'t_timestamp'} = $prefix_vars{'t_mtimestamp'}; } elsif (!$prefix_vars{'t_timestamp'} && $prefix_vars{'t_session_timestamp'}) { $prefix_vars{'t_timestamp'} = $prefix_vars{'t_session_timestamp'}; } ($prefix_vars{'t_year'}, $prefix_vars{'t_month'}, $prefix_vars{'t_day'}, $prefix_vars{'t_hour'}, $prefix_vars{'t_min'}, $prefix_vars{'t_sec'}) = ($prefix_vars{'t_timestamp'} =~ $time_pattern); # Skip unwanted lines next if ($from && ($from gt $prefix_vars{'t_timestamp'})); if ($to && ($to lt $prefix_vars{'t_timestamp'})) { if ($tmpoutfile) { $pipe->print("$cursize " . ($overall_stat{'queries_number'} - $old_queries_count) . " " . ($overall_stat{'errors_number'} - $old_errors_count) . "\n"); $old_queries_count = $overall_stat{'queries_number'}; $old_errors_count = $overall_stat{'errors_number'}; $cursize = 0; } $getout = 1; last; } # Jump to the last line parsed if required next if (!&check_incremental_position($prefix_vars{'t_timestamp'}, $line)); $cur_pid = $prefix_vars{'t_pid'}; # Store the current timestamp of the log line &store_current_timestamp($prefix_vars{'t_timestamp'}); # Extract information from log line prefix if (!$log_line_prefix) { &parse_log_prefix($prefix_vars{'t_logprefix'}); } # Check if the log line should be excluded from the report if (&validate_log_line($prefix_vars{'t_pid'})) { $prefix_vars{'t_host'} = 'stderr'; # Process the log line &parse_query(); } # Collect additional query information } elsif ($cur_pid && ($line !~ $orphan_stderr_line)) { if ($cur_info{$cur_pid}{vacuum} && ($line =~ /^\t(pages|tuples|buffer usage|avg read rate|system usage):/)) { if ($line =~ /^\t(pages|tuples): (\d+) removed, (\d+) remain/) { $autovacuum_info{tables}{$cur_info{$cur_pid}{vacuum}}{$1}{removed} += $2; } next; } elsif ( $cur_info{$cur_pid}{parameters} && (($line =~ /[,\s]*\$(\d+)\s=\s/) || ($line =~ /^'[^']*'$/)) ) { # stores bind parameters if any $cur_info{$cur_pid}{parameters} .= " $line"; next; } if (exists $cur_info{$cur_pid}{statement}) { $cur_info{$cur_pid}{statement} .= "\n" . $line; } elsif (exists $cur_info{$cur_pid}{context}) { $cur_info{$cur_pid}{context} .= "\n" . $line; } elsif (exists $cur_info{$cur_pid}{detail}) { $cur_info{$cur_pid}{detail} .= "\n" . $line; } else { $cur_info{$cur_pid}{query} .= "\n" . $line; } # Collect orphans lines of multiline queries } elsif ($cur_pid && ($cur_info{$cur_pid}{query})) { $cur_info{$cur_pid}{detail} .= "\n" . $line; } } else { # unknown format &logmsg('DEBUG', "Unknown line format: $line"); } last if (($stop_offset > 0) && ($current_offset > $stop_offset)); } } close $lfile; # Get stats from all pending temporary storage foreach my $pid (sort {$cur_info{$a}{date} <=> $cur_info{$b}{date}} keys %cur_info) { &store_queries($pid); } if ($extension eq 'tsung') { foreach my $pid (sort {$a <=> $b} keys %tsung_session) { &store_tsung_session($pid); } } if ($progress && !$getout) { if (!$tmpoutfile) { if ($totalsize) { if (($stop_offset > 0) && ($format ne 'csv')) { print STDERR &progress_bar($cursize - $start_offset, $stop_offset, 25, '=',$overall_stat{'queries_number'},$overall_stat{'errors_number'}); } elsif ($extension eq 'tsung') { print STDERR &progress_bar($cursize, $totalsize, 25, '=', $logfile); } else { print STDERR &progress_bar($cursize, $totalsize, 25, '=', $overall_stat{'queries_number'},$overall_stat{'errors_number'}); } print STDERR "\n"; } } else { $pipe->print("$cursize " . ($overall_stat{'queries_number'} - $old_queries_count) . " " . ($overall_stat{'errors_number'} - $old_errors_count) . "\n"); } } %cur_info = (); if ($tmpoutfile) { &dump_as_binary($tmpoutfile); $tmpoutfile->close(); } # Inform the parent that it should stop parsing other files if ($getout) { kill(12, $parent_pid); } # Save last line into temporary file if ($last_parsed && scalar keys %last_line) { if (open(OUT, ">>$tmp_last_parsed")) { flock(OUT, 2) || return $getout; print OUT "$last_line{datetime}\t$last_line{orig}\n"; close(OUT); } else { &logmsg('ERROR', "can't save last parsed line into $last_parsed, $!"); } } return $getout; } # Store the current timestamp of the log line sub store_current_timestamp { my $t_timestamp = shift; $prefix_vars{'t_date'} = $t_timestamp; $prefix_vars{'t_date'} =~ s/\D+//g; if (!$overall_stat{'first_log_ts'} || ($overall_stat{'first_log_ts'} gt $t_timestamp)) { $overall_stat{'first_log_ts'} = $t_timestamp; } if (!$overall_stat{'last_log_ts'} || ($overall_stat{'last_log_ts'} lt $t_timestamp)) { $overall_stat{'last_log_ts'} = $t_timestamp; } } # Method used to check if we have already reach the last parsing position in incremental mode # This position should have been saved in the incremental file and read in the $last_parsed at # start up. sub check_incremental_position { my ($cur_date, $line) = @_; if ($last_parsed) { if ($saved_last_line{datetime}) { if ($cur_date lt $saved_last_line{datetime}) { return 0; } elsif (!$last_line{datetime} && ($cur_date eq $saved_last_line{datetime})) { return 0 if ($line ne $saved_last_line{orig}); } } $last_line{datetime} = $cur_date; $last_line{orig} = $line; } return 1; } # Display message following the log level sub logmsg { my ($level, $str) = @_; return if ($quiet && ($level ne 'FATAL')); return if (!$debug && ($level eq 'DEBUG')); if ($level =~ /(\d+)/) { print STDERR "\t" x $1; } print STDERR "$level: $str\n"; } # Normalize SQL queries by removing parameters sub normalize_query { my $orig_query = shift; return if (!$orig_query); # Remove comments $orig_query =~ s/\/\*(.*?)\*\///gs; $orig_query = lc($orig_query); # Remove extra space, new line and tab characters by a single space $orig_query =~ s/[\t\s\r\n]+/ /gs; # Remove string content $orig_query =~ s/\\'//g; $orig_query =~ s/'[^']*'/''/g; $orig_query =~ s/''('')+/''/g; # Remove NULL parameters $orig_query =~ s/=\s*NULL/=''/g; # Remove numbers $orig_query =~ s/([^a-z_\$-])-?([0-9]+)/${1}0/g; # Remove hexadecimal numbers $orig_query =~ s/([^a-z_\$-])0x[0-9a-f]{1,10}/${1}0x/g; # Remove IN values $orig_query =~ s/in\s*\([\'0x,\s]*\)/in (...)/g; return $orig_query; } # Format numbers with comma for better reading sub comma_numbers { return 0 if ($#_ < 0); my $text = reverse $_[0]; $text =~ s/(\d\d\d)(?=\d)(?!\d*\.)/$1$num_sep/g; return scalar reverse $text; } # Format duration sub convert_time { my $time = shift; return '0s' if (!$time); my $days = int($time / 86400000); $time -= ($days * 86400000); my $hours = int($time / 3600000); $time -= ($hours * 3600000); my $minutes = int($time / 60000); $time -= ($minutes * 60000); my $seconds = sprintf("%0.3f", $time / 1000); $days = $days < 1 ? '' : $days . 'd'; $hours = $hours < 1 ? '' : $hours . 'h'; $minutes = $minutes < 1 ? '' : $minutes . 'm'; $time = $days . $hours . $minutes . $seconds . 's'; return $time; } # Stores the top N queries generating the biggest temporary file sub set_top_tempfile_info { my ($q, $sz, $date, $db, $user, $remote, $app) = @_; push(@top_tempfile_info, [($sz, $date, $q, $db, $user, $remote, $app)]); my @tmp_top_tempfile_info = sort {$b->[0] <=> $a->[0]} @top_tempfile_info; @top_tempfile_info = (); for (my $i = 0; $i <= $#tmp_top_tempfile_info; $i++) { push(@top_tempfile_info, $tmp_top_tempfile_info[$i]); last if ($i == $end_top); } } # Stores the top N queries waiting the most sub set_top_locked_info { my ($q, $dt, $date, $db, $user, $remote, $app) = @_; push(@top_locked_info, [($dt, $date, $q, $db, $user, $remote, $app)]); my @tmp_top_locked_info = sort {$b->[0] <=> $a->[0]} @top_locked_info; @top_locked_info = (); for (my $i = 0; $i <= $#tmp_top_locked_info; $i++) { push(@top_locked_info, $tmp_top_locked_info[$i]); last if ($i == $end_top); } } # Stores the top N slowest queries sub set_top_slowest { my ($q, $dt, $date, $db, $user, $remote, $app) = @_; push(@top_slowest, [($dt, $date, $q, $db, $user, $remote, $app)]); my @tmp_top_slowest = sort {$b->[0] <=> $a->[0]} @top_slowest; @top_slowest = (); for (my $i = 0; $i <= $#tmp_top_slowest; $i++) { push(@top_slowest, $tmp_top_slowest[$i]); last if ($i == $end_top); } } # Stores top N slowest sample queries sub set_top_sample { my ($norm, $q, $dt, $date, $db, $user, $remote, $app) = @_; $normalyzed_info{$norm}{samples}{$dt}{query} = $q; $normalyzed_info{$norm}{samples}{$dt}{date} = $date; $normalyzed_info{$norm}{samples}{$dt}{db} = $db; $normalyzed_info{$norm}{samples}{$dt}{user} = $user; $normalyzed_info{$norm}{samples}{$dt}{remote} = $remote; $normalyzed_info{$norm}{samples}{$dt}{app} = $app; my $i = 1; foreach my $k (sort {$b <=> $a} keys %{$normalyzed_info{$norm}{samples}}) { if ($i > $sample) { delete $normalyzed_info{$norm}{samples}{$k}; } $i++; } } # Stores top N error sample queries sub set_top_error_sample { my ($q, $date, $real_error, $detail, $context, $statement, $hint, $db) = @_; # Stop when we have our number of samples if (!exists $error_info{$q}{date} || ($#{$error_info{$q}{date}} < $sample)) { if (($q =~ /deadlock detected/) || !grep(/\Q$real_error\E/, @{$error_info{$q}{error}})) { push(@{$error_info{$q}{date}}, $date); push(@{$error_info{$q}{detail}}, $detail); push(@{$error_info{$q}{context}}, $context); push(@{$error_info{$q}{statement}}, $statement); push(@{$error_info{$q}{hint}}, $hint); push(@{$error_info{$q}{error}}, $real_error); push(@{$error_info{$q}{db}}, $db); } } } sub dump_as_text { # Global information my $curdate = localtime(time); my $fmt_nlines = &comma_numbers($nlines); my $total_time = timestr($td); $total_time =~ s/^([\.0-9]+) wallclock.*/$1/; $total_time = &convert_time($total_time * 1000); my $logfile_str = $log_files[0]; if ($#log_files > 0) { $logfile_str .= ', ..., ' . $log_files[-1]; } print $fh qq{ $report_title - Global information --------------------------------------------------- Generated on $curdate Log file: $logfile_str Parsed $fmt_nlines log entries in $total_time Log start from $overall_stat{'first_log_ts'} to $overall_stat{'last_log_ts'} }; # Overall statistics my $fmt_unique = &comma_numbers(scalar keys %normalyzed_info) || 0; my $fmt_queries = &comma_numbers($overall_stat{'queries_number'}) || 0; my $fmt_duration = &convert_time($overall_stat{'queries_duration'}) || 0; print $fh qq{ - Overall statistics --------------------------------------------------- Number of unique normalized queries: $fmt_unique Number of queries: $fmt_queries Total query duration: $fmt_duration First query: $overall_stat{'first_query_ts'} Last query: $overall_stat{'last_query_ts'} }; foreach (sort {$overall_stat{'query_peak'}{$b} <=> $overall_stat{'query_peak'}{$a}} keys %{$overall_stat{'query_peak'}}) { print $fh "Query peak: ", &comma_numbers($overall_stat{'query_peak'}{$_}), " queries/s at $_"; last; } if (!$disable_error) { my $fmt_errors = &comma_numbers($overall_stat{'errors_number'}) || 0; my $fmt_unique_error = &comma_numbers(scalar keys %{$overall_stat{'unique_normalized_errors'}}) || 0; print $fh qq{ Number of events: $fmt_errors Number of unique normalized events: $fmt_unique_error }; } if ($tempfile_info{count}) { my $fmt_temp_maxsise = &comma_numbers($tempfile_info{maxsize}) || 0; my $fmt_temp_avsize = &comma_numbers(sprintf("%.2f", ($tempfile_info{size} / $tempfile_info{count}))); print $fh qq{Number temporary files: $tempfile_info{count} Max size of temporary files: $fmt_temp_maxsise Average size of temporary files: $fmt_temp_avsize }; } if (!$disable_session && $session_info{count}) { my $avg_session_duration = &convert_time($session_info{duration} / $session_info{count}); my $tot_session_duration = &convert_time($session_info{duration}); print $fh qq{Total number of sessions: $session_info{count} Total duration of sessions: $tot_session_duration Average duration of sessions: $avg_session_duration }; } if (!$disable_connection && $connection_info{count}) { print $fh "Total number of connections: $connection_info{count}\n"; } if (scalar keys %database_info > 1) { print $fh "Total number of databases: ", scalar keys %database_info, "\n"; } if (!$disable_hourly && $overall_stat{'queries_number'}) { print $fh qq{ - Hourly statistics ---------------------------------------------------- Report not supported by text format }; } # INSERT/DELETE/UPDATE/SELECT repartition my $totala = $overall_stat{'SELECT'} + $overall_stat{'INSERT'} + $overall_stat{'UPDATE'} + $overall_stat{'DELETE'}; if (!$disable_type && $totala) { my $total = $overall_stat{'queries_number'} || 1; print $fh "\n- Queries by type ------------------------------------------------------\n\n"; print $fh "Type Count Percentage\n"; print $fh "SELECT: ", &comma_numbers($overall_stat{'SELECT'}) || 0, " ", sprintf("%0.2f", ($overall_stat{'SELECT'} * 100) / $total), "%\n"; print $fh "INSERT: ", &comma_numbers($overall_stat{'INSERT'}) || 0, " ", sprintf("%0.2f", ($overall_stat{'INSERT'} * 100) / $total), "%\n"; print $fh "UPDATE: ", &comma_numbers($overall_stat{'UPDATE'}) || 0, " ", sprintf("%0.2f", ($overall_stat{'UPDATE'} * 100) / $total), "%\n"; print $fh "DELETE: ", &comma_numbers($overall_stat{'DELETE'}) || 0, " ", sprintf("%0.2f", ($overall_stat{'DELETE'} * 100) / $total), "%\n"; print $fh "OTHERS: ", &comma_numbers($total - $totala) || 0, " ", sprintf("%0.2f", (($total - $totala) * 100) / $total), "%\n" if (($total - $totala) > 0); print $fh "\n"; # Show request per database statistics if (scalar keys %database_info > 1) { print $fh "\n- Request per database ------------------------------------------------------\n\n"; print $fh "Database Request type Count\n"; foreach my $d (sort keys %database_info) { print $fh "$d - ", &comma_numbers($database_info{$d}{count}), "\n"; foreach my $r (sort keys %{$database_info{$d}}) { next if ($r eq 'count'); print $fh "\t$r ", &comma_numbers($database_info{$d}{$r}), "\n"; } } } # Show request per application statistics if (scalar keys %application_info > 1) { print $fh "\n- Request per application ------------------------------------------------------\n\n"; print $fh "Application Request type Count\n"; foreach my $d (sort keys %application_info) { print $fh "$d - ", &comma_numbers($application_info{$d}{count}), "\n"; foreach my $r (sort keys %{$application_info{$d}}) { next if ($r eq 'count'); print $fh "\t$r ", &comma_numbers($application_info{$d}{$r}), "\n"; } } } } if (!$disable_lock && scalar keys %lock_info > 0) { print $fh "\n- Locks by type ------------------------------------------------------\n\n"; print $fh "Type Object Count Total Duration Avg duration (s)\n"; my $total_count = 0; my $total_duration = 0; foreach my $t (sort keys %lock_info) { print $fh "$t\t\t", &comma_numbers($lock_info{$t}{count}), " ", &convert_time($lock_info{$t}{duration}), " ", &convert_time($lock_info{$t}{duration} / $lock_info{$t}{count}), "\n"; foreach my $o (sort keys %{$lock_info{$t}}) { next if (($o eq 'count') || ($o eq 'duration') || ($o eq 'chronos')); print $fh "\t$o\t", &comma_numbers($lock_info{$t}{$o}{count}), " ", &convert_time($lock_info{$t}{$o}{duration}), " ", &convert_time($lock_info{$t}{$o}{duration} / $lock_info{$t}{$o}{count}), "\n"; } $total_count += $lock_info{$t}{count}; $total_duration += $lock_info{$t}{duration}; } print $fh "Total:\t\t\t", &comma_numbers($total_count), " ", &convert_time($total_duration), " ", &convert_time($total_duration / ($total_count || 1)), "\n"; } # Show session per database statistics if (!$disable_session && exists $session_info{database}) { print $fh "\n- Sessions per database ------------------------------------------------------\n\n"; print $fh "Database Count Total Duration Avg duration (s)\n"; foreach my $d (sort keys %{$session_info{database}}) { print $fh "$d - ", &comma_numbers($session_info{database}{$d}{count}), " ", &convert_time($session_info{database}{$d}{duration}), " ", &convert_time($session_info{database}{$d}{duration} / $session_info{database}{$d}{count}), "\n"; } } # Show session per user statistics if (!$disable_session && exists $session_info{user}) { print $fh "\n- Sessions per user ------------------------------------------------------\n\n"; print $fh "User Count Total Duration Avg duration (s)\n"; foreach my $d (sort keys %{$session_info{user}}) { print $fh "$d - ", &comma_numbers($session_info{user}{$d}{count}), " ", &convert_time($session_info{user}{$d}{duration}), " ", &convert_time($session_info{user}{$d}{duration} / $session_info{user}{$d}{count}), "\n"; } } # Show session per host statistics if (!$disable_session && exists $session_info{host}) { print $fh "\n- Sessions per host ------------------------------------------------------\n\n"; print $fh "User Count Total Duration Avg duration (s)\n"; foreach my $d (sort keys %{$session_info{host}}) { print $fh "$d - ", &comma_numbers($session_info{host}{$d}{count}), " ", &convert_time($session_info{host}{$d}{duration}), " ", &convert_time($session_info{host}{$d}{duration} / $session_info{host}{$d}{count}), "\n"; } } # Show connection per database statistics if (!$disable_connection && exists $connection_info{database}) { print $fh "\n- Connections per database ------------------------------------------------------\n\n"; print $fh "Database User Count\n"; foreach my $d (sort keys %{$connection_info{database}}) { print $fh "$d - ", &comma_numbers($connection_info{database}{$d}), "\n"; foreach my $u (sort keys %{$connection_info{user}}) { next if (!exists $connection_info{database_user}{$d}{$u}); print $fh "\t$u ", &comma_numbers($connection_info{database_user}{$d}{$u}), "\n"; } } } # Show connection per user statistics if (!$disable_connection && exists $connection_info{user}) { print $fh "\n- Connections per user ------------------------------------------------------\n\n"; print $fh "User Count\n"; foreach my $d (sort keys %{$connection_info{user}}) { print $fh "$d - ", &comma_numbers($connection_info{user}{$d}), "\n"; } } # Show connection per host statistics if (!$disable_connection && exists $connection_info{host}) { print $fh "\n- Connections per host ------------------------------------------------------\n\n"; print $fh "User Count\n"; foreach my $d (sort keys %{$connection_info{host}}) { print $fh "$d - ", &comma_numbers($connection_info{host}{$d}), "\n"; } } # Show lock wait detailed informations if (!$disable_lock && scalar keys %lock_info > 0) { my @top_locked_queries; foreach my $h (keys %normalyzed_info) { if (exists($normalyzed_info{$h}{locks})) { push (@top_locked_queries, [$h, $normalyzed_info{$h}{locks}{count}, $normalyzed_info{$h}{locks}{wait}, $normalyzed_info{$h}{locks}{minwait}, $normalyzed_info{$h}{locks}{maxwait}]); } } # Most frequent waiting queries (N) @top_locked_queries = sort {$b->[2] <=> $a->[2]} @top_locked_queries; print $fh "\n- Most frequent waiting queries (N) -----------------------------------------\n\n"; print $fh "Rank Count Total wait time (s) Min/Max/Avg duration (s) Query\n"; for (my $i = 0 ; $i <= $#top_locked_queries ; $i++) { last if ($i > $end_top); print $fh ($i + 1), ") ", $top_locked_queries[$i]->[1], " - ", &convert_time($top_locked_queries[$i]->[2]), " - ", &convert_time($top_locked_queries[$i]->[3]), "/", &convert_time($top_locked_queries[$i]->[4]), "/", &convert_time(($top_locked_queries[$i]->[4] / $top_locked_queries[$i]->[1])), " - ", $top_locked_queries[$i]->[0], "\n"; print $fh "--\n"; my $k = $top_locked_queries[$i]->[0]; my $j = 1; foreach my $d (sort {$b <=> $a} keys %{$normalyzed_info{$k}{samples}}) { my $ttl = $top_locked_info[$i]->[1] || ''; my $db = " - $normalyzed_info{$k}{samples}{$d}{date} - database: $normalyzed_info{$k}{samples}{$d}{db}" if ($normalyzed_info{$k}{samples}{$d}{db}); $db .= ", user: $normalyzed_info{$k}{samples}{$d}{user}" if ($normalyzed_info{$k}{samples}{$d}{user}); $db .= ", remote: $normalyzed_info{$k}{samples}{$d}{remote}" if ($normalyzed_info{$k}{samples}{$d}{remote}); $db .= ", app: $normalyzed_info{$k}{samples}{$d}{app}" if ($normalyzed_info{$k}{samples}{$d}{app}); $db =~ s/^, / - /; print $fh "\t- Example $j: ", &convert_time($d), "$db - ", $normalyzed_info{$k}{samples}{$d}{query}, "\n"; $j++; } } print $fh "\n"; @top_locked_queries = (); # Queries that waited the most @top_locked_info = sort {$b->[1] <=> $a->[1]} @top_locked_info; print $fh "\n- Queries that waited the mosts ---------------------------------------------\n\n"; print $fh "Rank Wait time (s) Query\n"; for (my $i = 0 ; $i <= $#top_locked_info ; $i++) { my $ttl = $top_locked_info[$i]->[1] || ''; my $db = " - database: $top_locked_info[$i]->[3]" if ($top_locked_info[$i]->[3]); $db .= ", user: $top_locked_info[$i]->[4]" if ($top_locked_info[$i]->[4]); $db .= ", remote: $top_locked_info[$i]->[5]" if ($top_locked_info[$i]->[5]); $db .= ", app: $top_locked_info[$i]->[6]" if ($top_locked_info[$i]->[6]); $db =~ s/^, / - /; print $fh ($i + 1), ") ", &convert_time($top_locked_info[$i]->[0]), " $ttl$db - ", $top_locked_info[$i]->[2], "\n"; print $fh "--\n"; } print $fh "\n"; } # Show temporary files detailed informations if (!$disable_temporary && scalar keys %tempfile_info > 0) { my @top_temporary; foreach my $h (keys %normalyzed_info) { if (exists($normalyzed_info{$h}{tempfiles})) { push (@top_temporary, [$h, $normalyzed_info{$h}{tempfiles}{count}, $normalyzed_info{$h}{tempfiles}{size}, $normalyzed_info{$h}{tempfiles}{minsize}, $normalyzed_info{$h}{tempfiles}{maxsize}]); } } # Queries generating the most temporary files (N) @top_temporary = sort {$b->[1] <=> $a->[1]} @top_temporary; print $fh "\n- Queries generating the most temporary files (N) ---------------------------\n\n"; print $fh "Rank Count Total size Min/Max/Avg size Query\n"; my $idx = 1; for (my $i = 0 ; $i <= $#top_temporary ; $i++) { last if ($i > $end_top); print $fh $idx, ") ", $top_temporary[$i]->[1], " - ", &comma_numbers($top_temporary[$i]->[2]), " - ", &comma_numbers($top_temporary[$i]->[3]), "/", &comma_numbers($top_temporary[$i]->[4]), "/", &comma_numbers(sprintf("%.2f", $top_temporary[$i]->[2] / $top_temporary[$i]->[1])), " - ", $top_temporary[$i]->[0], "\n"; print $fh "--\n"; my $k = $top_temporary[$i]->[0]; if ($normalyzed_info{$k}{count} > 1) { my $j = 1; foreach my $d (sort {$b <=> $a} keys %{$normalyzed_info{$k}{samples}}) { my $db = "$normalyzed_info{$k}{samples}{$d}{date} - database: $normalyzed_info{$k}{samples}{$d}{db}" if ($normalyzed_info{$k}{samples}{$d}{db}); $db .= ", user: $normalyzed_info{$k}{samples}{$d}{user}" if ($normalyzed_info{$k}{samples}{$d}{user}); $db .= ", remote: $normalyzed_info{$k}{samples}{$d}{remote}" if ($normalyzed_info{$k}{samples}{$d}{remote}); $db .= ", app: $normalyzed_info{$k}{samples}{$d}{app}" if ($normalyzed_info{$k}{samples}{$d}{app}); $db =~ s/^, / - /; print $fh "\t- Example $j: ", &convert_time($d), " - $db - ", $normalyzed_info{$k}{samples}{$d}{query}, "\n"; $j++; } } $idx++; } @top_temporary = (); # Top queries generating the largest temporary files @top_tempfile_info = sort {$b->[1] <=> $a->[1]} @top_tempfile_info; print $fh "\n- Queries generating the largest temporary files ----------------------------\n\n"; print $fh "Rank Size Query\n"; for (my $i = 0 ; $i <= $#top_tempfile_info ; $i++) { my $ttl = $top_tempfile_info[$i]->[1] || ''; my $db = " - database: $top_tempfile_info[$i]->[3]" if ($top_tempfile_info[$i]->[3]); $db .= ", user: $top_tempfile_info[$i]->[4]" if ($top_tempfile_info[$i]->[4]); $db .= ", remote: $top_tempfile_info[$i]->[5]" if ($top_tempfile_info[$i]->[5]); $db .= ", app: $top_tempfile_info[$i]->[6]" if ($top_tempfile_info[$i]->[6]); $db =~ s/^, / - /; print $fh ($i + 1), ") ", &comma_numbers($top_tempfile_info[$i]->[0]), " - $ttl$db - ", $top_tempfile_info[$i]->[2], "\n"; } print $fh "\n"; } # Show top information if (!$disable_query && ($#top_slowest >= 0)) { print $fh "\n- Slowest queries ------------------------------------------------------\n\n"; print $fh "Rank Duration (s) Query\n"; for (my $i = 0 ; $i <= $#top_slowest ; $i++) { my $db = " database: $top_slowest[$i]->[3]" if ($top_slowest[$i]->[3]); $db .= ", user: $top_slowest[$i]->[4]" if ($top_slowest[$i]->[4]); $db .= ", remote: $top_slowest[$i]->[5]" if ($top_slowest[$i]->[5]); $db .= ", app: $top_slowest[$i]->[6]" if ($top_slowest[$i]->[6]); $db =~ s/^, //; print $fh $i + 1, ") " . &convert_time($top_slowest[$i]->[0]) . "$db - $top_slowest[$i]->[2]\n"; print $fh "--\n"; } print $fh "\n- Queries that took up the most time (N) -------------------------------\n\n"; print $fh "Rank Total duration Times executed Min/Max/Avg duration (s) Query\n"; my $idx = 1; foreach my $k (sort {$normalyzed_info{$b}{duration} <=> $normalyzed_info{$a}{duration}} keys %normalyzed_info) { next if (!$normalyzed_info{$k}{count}); last if ($idx > $top); my $q = $k; if ($normalyzed_info{$k}{count} == 1) { foreach (keys %{$normalyzed_info{$k}{samples}}) { $q = $normalyzed_info{$k}{samples}{$_}{query}; last; } } $normalyzed_info{$k}{average} = $normalyzed_info{$k}{duration} / $normalyzed_info{$k}{count}; print $fh "$idx) " . &convert_time($normalyzed_info{$k}{duration}) . " - " . &comma_numbers($normalyzed_info{$k}{count}) . " - " . &convert_time($normalyzed_info{$k}{min}) . "/" . &convert_time($normalyzed_info{$k}{max}) . "/" . &convert_time($normalyzed_info{$k}{average}) . " - $q\n"; print $fh "--\n"; my $i = 1; foreach my $d (sort {$b <=> $a} keys %{$normalyzed_info{$k}{samples}}) { my $db = " - database: $normalyzed_info{$k}{samples}{$d}{db}" if ($normalyzed_info{$k}{samples}{$d}{db}); $db .= ", user: $normalyzed_info{$k}{samples}{$d}{user}" if ($normalyzed_info{$k}{samples}{$d}{user}); $db .= ", remote: $normalyzed_info{$k}{samples}{$d}{remote}" if ($normalyzed_info{$k}{samples}{$d}{remote}); $db .= ", app: $normalyzed_info{$k}{samples}{$d}{app}" if ($normalyzed_info{$k}{samples}{$d}{app}); $db =~ s/^, / - /; print $fh "\t- Example $i: ", &convert_time($d), "$db - ", $normalyzed_info{$k}{samples}{$d}{query}, "\n"; $i++; } $idx++; } } if (!$disable_query && (scalar keys %normalyzed_info > 0)) { print $fh "\n- Most frequent queries (N) --------------------------------------------\n\n"; print $fh "Rank Times executed Total duration Min/Max/Avg duration (s) Query\n"; my $idx = 1; foreach my $k (sort {$normalyzed_info{$b}{count} <=> $normalyzed_info{$a}{count}} keys %normalyzed_info) { next if (!$normalyzed_info{$k}{count}); last if ($idx > $top); my $q = $k; if ($normalyzed_info{$k}{count} == 1) { foreach (keys %{$normalyzed_info{$k}{samples}}) { $q = $normalyzed_info{$k}{samples}{$_}{query}; last; } } print $fh "$idx) " . &comma_numbers($normalyzed_info{$k}{count}) . " - " . &convert_time($normalyzed_info{$k}{duration}) . " - " . &convert_time($normalyzed_info{$k}{min}) . "/" . &convert_time($normalyzed_info{$k}{max}) . "/" . &convert_time($normalyzed_info{$k}{duration} / $normalyzed_info{$k}{count}) . " - $q\n"; print $fh "--\n"; my $i = 1; foreach my $d (sort {$b <=> $a} keys %{$normalyzed_info{$k}{samples}}) { my $db = " - database: $normalyzed_info{$k}{samples}{$d}{db}" if ($normalyzed_info{$k}{samples}{$d}{db}); $db .= ", user: $normalyzed_info{$k}{samples}{$d}{user}" if ($normalyzed_info{$k}{samples}{$d}{user}); $db .= ", remote: $normalyzed_info{$k}{samples}{$d}{remote}" if ($normalyzed_info{$k}{samples}{$d}{remote}); $db .= ", app: $normalyzed_info{$k}{samples}{$d}{app}" if ($normalyzed_info{$k}{samples}{$d}{app}); $db =~ s/^, / - /; print $fh "\tExample $i: ", &convert_time($d), "$db - ", $normalyzed_info{$k}{samples}{$d}{query}, "\n"; $i++; } $idx++; } } if (!$disable_query && ($#top_slowest >= 0)) { print $fh "\n- Slowest queries (N) --------------------------------------------------\n\n"; print $fh "Rank Min/Max/Avg duration (s) Times executed Total duration Query\n"; my $idx = 1; foreach my $k (sort {$normalyzed_info{$b}{average} <=> $normalyzed_info{$a}{average}} keys %normalyzed_info) { next if (!$normalyzed_info{$k}{count}); last if ($idx > $top); my $q = $k; if ($normalyzed_info{$k}{count} == 1) { foreach (keys %{$normalyzed_info{$k}{samples}}) { $q = $normalyzed_info{$k}{samples}{$_}{query}; last; } } print $fh "$idx) " . &convert_time($normalyzed_info{$k}{min}) . "/" . &convert_time($normalyzed_info{$k}{max}) . "/" . &convert_time($normalyzed_info{$k}{average}) . " - " . &comma_numbers($normalyzed_info{$k}{count}) . " - " . &convert_time($normalyzed_info{$k}{duration}) . " - $q\n"; print $fh "--\n"; my $i = 1; foreach my $d (sort {$b <=> $a} keys %{$normalyzed_info{$k}{samples}}) { my $db = " - database: $normalyzed_info{$k}{samples}{$d}{db}" if ($normalyzed_info{$k}{samples}{$d}{db}); $db .= ", user: $normalyzed_info{$k}{samples}{$d}{user}" if ($normalyzed_info{$k}{samples}{$d}{user}); $db .= ", remote: $normalyzed_info{$k}{samples}{$d}{remote}" if ($normalyzed_info{$k}{samples}{$d}{remote}); $db .= ", app: $normalyzed_info{$k}{samples}{$d}{app}" if ($normalyzed_info{$k}{samples}{$d}{app}); $db =~ s/^, / - /; print $fh "\tExample $i: ", &convert_time($d), "$db - ", $normalyzed_info{$k}{samples}{$d}{query}, "\n"; $i++; } $idx++; } } @top_slowest = (); if (!$disable_error) { &show_error_as_text(); } print $fh "\n\n"; print $fh "Report generated by pgBadger $VERSION ($project_url).\n"; } sub dump_error_as_text { # Global information my $curdate = localtime(time); my $fmt_nlines = &comma_numbers($nlines); my $total_time = timestr($td); $total_time =~ s/^([\.0-9]+) wallclock.*/$1/; $total_time = &convert_time($total_time * 1000); my $logfile_str = $log_files[0]; if ($#log_files > 0) { $logfile_str .= ', ..., ' . $log_files[-1]; } print $fh qq{ $report_title - Global information --------------------------------------------------- Generated on $curdate Log file: $logfile_str Parsed $fmt_nlines log entries in $total_time Log start from $overall_stat{'first_log_ts'} to $overall_stat{'last_log_ts'} }; &show_error_as_text(); print $fh "\n\n"; print $fh "Report generated by pgBadger $VERSION ($project_url).\n"; } sub show_error_as_text { return if (scalar keys %error_info == 0); print $fh "\n- Most frequent events (N) ---------------------------------------------\n\n"; my $idx = 1; foreach my $k (sort {$error_info{$b}{count} <=> $error_info{$a}{count}} keys %error_info) { next if (!$error_info{$k}{count}); last if ($idx > $top); if ($error_info{$k}{count} > 1) { my $msg = $k; $msg =~ s/HINT: (parameter "[^"]+" changed to)/LOG: $1/; $msg =~ s/HINT: (database system was shut down)/LOG: $1/; print $fh "$idx) " . &comma_numbers($error_info{$k}{count}) . " - $msg\n"; print $fh "--\n"; my $j = 1; for (my $i = 0 ; $i <= $#{$error_info{$k}{date}} ; $i++) { if ( ($error_info{$k}{error}[$i] =~ s/HINT: (parameter "[^"]+" changed to)/LOG: $1/) || ($error_info{$k}{error}[$i] =~ s/HINT: (database system was shut down)/LOG: $1/)) { $logs_type{HINT}--; $logs_type{LOG}++; } print $fh "\t- Example $j: $error_info{$k}{date}[$i] - $error_info{$k}{error}[$i]\n"; print $fh "\t\tDetail: $error_info{$k}{detail}[$i]\n" if ($error_info{$k}{detail}[$i]); print $fh "\t\tContext: $error_info{$k}{context}[$i]\n" if ($error_info{$k}{context}[$i]); print $fh "\t\tHint: $error_info{$k}{hint}[$i]\n" if ($error_info{$k}{hint}[$i]); print $fh "\t\tStatement: $error_info{$k}{statement}[$i]\n" if ($error_info{$k}{statement}[$i]); print $fh "\t\tDatabase: $error_info{$k}{db}[$i]\n" if ($error_info{$k}{db}[$i]); $j++; } } else { if ( ($error_info{$k}{error}[0] =~ s/HINT: (parameter "[^"]+" changed to)/LOG: $1/) || ($error_info{$k}{error}[0] =~ s/HINT: (database system was shut down)/LOG: $1/)) { $logs_type{HINT}--; $logs_type{LOG}++; } print $fh "$idx) " . &comma_numbers($error_info{$k}{count}) . " - $error_info{$k}{error}[0]\n"; print $fh "--\n"; print $fh "\t- Date: $error_info{$k}{date}[0]\n"; print $fh "\t\tDetail: $error_info{$k}{detail}[0]\n" if ($error_info{$k}{detail}[0]); print $fh "\t\tContext: $error_info{$k}{context}[0]\n" if ($error_info{$k}{context}[0]); print $fh "\t\tHint: $error_info{$k}{hint}[0]\n" if ($error_info{$k}{hint}[0]); print $fh "\t\tStatement: $error_info{$k}{statement}[0]\n" if ($error_info{$k}{statement}[0]); print $fh "\t\tDatabase: $error_info{$k}{db}[0]\n" if ($error_info{$k}{db}[0]); } $idx++; } if (scalar keys %logs_type > 0) { print $fh "\n- Logs per type ---------------------------------------------\n\n"; my $total_logs = 0; foreach my $d (keys %logs_type) { $total_logs += $logs_type{$d}; } print $fh "Logs type Count Percentage\n"; foreach my $d (sort keys %logs_type) { next if (!$logs_type{$d}); print $fh "$d\t\t", &comma_numbers($logs_type{$d}), "\t", sprintf("%0.2f", ($logs_type{$d} * 100) / $total_logs), "%\n"; } } } sub get_page_style { return qq{ }; } sub html_header { my $date = localtime(time); my $style = &get_page_style(); print $fh qq{ $report_title $style }; if (!$nograph) { my @jscode = ; print $fh <

$report_title

}; print $fh qq{ \n"; print $fh "

Normalized reports are marked with a \"(N)\".

\n"; } sub html_footer { print $fh qq{

 

}; print $fh qq{
Table of contents
}; print $fh qq{ }; } sub dump_as_html { # Dump the html header &html_header(); # Global information my $curdate = localtime(time); my $fmt_nlines = &comma_numbers($nlines); my $total_time = timestr($td); $total_time =~ s/^([\.0-9]+) wallclock.*/$1/; $total_time = &convert_time($total_time * 1000); my $logfile_str = $log_files[0]; if ($#log_files > 0) { $logfile_str .= ', ..., ' . $log_files[-1]; } print $fh qq{
  • Generated on $curdate
  • Log file: $logfile_str
  • Parsed $fmt_nlines log entries in $total_time
  • Log start from $overall_stat{'first_log_ts'} to $overall_stat{'last_log_ts'}
}; # Overall statistics my $fmt_unique = &comma_numbers(scalar keys %normalyzed_info) || 0; my $fmt_queries = &comma_numbers($overall_stat{'queries_number'}) || 0; my $fmt_duration = &convert_time($overall_stat{'queries_duration'}) || 0; print $fh qq{

Overall statistics ^

  • Number of unique normalized queries: $fmt_unique
  • Number of queries: $fmt_queries
  • Total query duration: $fmt_duration
  • First query: $overall_stat{'first_query_ts'}
  • Last query: $overall_stat{'last_query_ts'}
  • }; foreach (sort {$overall_stat{'query_peak'}{$b} <=> $overall_stat{'query_peak'}{$a}} keys %{$overall_stat{'query_peak'}}) { print $fh "
  • Query peak: ", &comma_numbers($overall_stat{'query_peak'}{$_}), " queries/s at $_
  • "; last; } if (!$disable_error) { my $fmt_errors = &comma_numbers($overall_stat{'errors_number'}) || 0; my $fmt_unique_error = &comma_numbers(scalar keys %{$overall_stat{'unique_normalized_errors'}}) || 0; print $fh qq{
  • Number of events: $fmt_errors
  • Number of unique normalized events: $fmt_unique_error
  • }; } if ($autovacuum_info{count}) { print $fh qq{
  • Total number of automatic vacuums: $autovacuum_info{count}
  • }; } if ($autoanalyze_info{count}) { print $fh qq{
  • Total number of automatic analyzes: $autoanalyze_info{count}
  • }; } print $fh qq{
    }; if ($tempfile_info{count}) { my $fmt_temp_maxsise = &comma_numbers($tempfile_info{maxsize}) || 0; my $fmt_temp_avsize = &comma_numbers(sprintf("%.2f", $tempfile_info{size} / $tempfile_info{count})); print $fh qq{
  • Number of temporary files: $tempfile_info{count}
  • Max size of temporary files: $fmt_temp_maxsise
  • Average size of temporary files: $fmt_temp_avsize
  • }; } if (!$disable_session && $session_info{count}) { my $avg_session_duration = &convert_time($session_info{duration} / $session_info{count}); my $tot_session_duration = &convert_time($session_info{duration}); print $fh qq{
  • Total number of sessions: $session_info{count}
  • Total duration of sessions: $tot_session_duration
  • Average duration of sessions: $avg_session_duration
  • }; } if (!$disable_connection && $connection_info{count}) { print $fh qq{
  • Total number of connections: $connection_info{count}
  • }; } if (scalar keys %database_info > 1) { my $db_count = scalar keys %database_info; print $fh qq{
  • Total number of databases: $db_count
  • }; } print $fh qq{
}; # Declare variables used to draw graphs my @labels = (); my @data1 = (); my @data2 = (); my @data3 = (); my $d1 = ''; my $d2 = ''; my $d3 = ''; my @avgs = (); for (my $i = 0 ; $i < 59 ; $i += $avg_minutes) { push(@avgs, sprintf("%02d", $i)); } push(@avgs, 59); # Set graphs limits $overall_stat{'first_log_ts'} =~ /(\d+)-(\d+)-(\d+) (\d+):(\d+):(\d+)/; $t_min = timegm_nocheck(0, $5, $4, $3, $2 - 1, $1) * 1000; $t_min -= ($avg_minutes * 60000); $t_min_hour = timegm_nocheck(0, 0, $4, $3, $2 - 1, $1) * 1000; $overall_stat{'last_log_ts'} =~ /(\d+)-(\d+)-(\d+) (\d+):(\d+):(\d+)/; $t_max = timegm_nocheck(59, $5, $4, $3, $2 - 1, $1) * 1000; $t_max += ($avg_minutes * 60000); $t_max_hour = timegm_nocheck(0, 0, $4, $3, $2 - 1, $1) * 1000; # Start creating hourly reports if (!$disable_hourly && ($overall_stat{'queries_number'} || exists $connection_info{chronos})) { print $fh qq{

Hourly statistics ^

}; } if (!$disable_hourly && $overall_stat{'queries_number'}) { print $fh qq{ }; if (exists $connection_info{chronos}) { print $fh " \n"; } if (exists $session_info{chronos}) { print $fh " \n"; } print $fh qq{ }; if (exists $connection_info{chronos}) { print $fh " \n"; } if (exists $session_info{chronos}) { print $fh " \n"; } print $fh qq{ }; foreach my $d (sort {$a <=> $b} keys %per_hour_info) { my $c = 1; $d =~ /^\d{4}(\d{2})(\d{2})$/; my $zday = "$abbr_month{$1} $2"; foreach my $h (sort {$a <=> $b} keys %{$per_hour_info{$d}}) { my $colb = $c % 2; $zday = " " if ($c > 1); $per_hour_info{$d}{$h}{average} = $per_hour_info{$d}{$h}{duration} / ($per_hour_info{$d}{$h}{count} || 1); $per_hour_info{$d}{$h}{'SELECT'}{average} = $per_hour_info{$d}{$h}{'SELECT'}{duration} / ($per_hour_info{$d}{$h}{'SELECT'}{count} || 1); my $write_average = ( ( $per_hour_info{$d}{$h}{'INSERT'}{duration} + $per_hour_info{$d}{$h}{'UPDATE'}{duration} + $per_hour_info{$d}{$h}{'DELETE'}{duration} ) || 0 ) / ( ( $per_hour_info{$d}{$h}{'INSERT'}{count} + $per_hour_info{$d}{$h}{'UPDATE'}{count} + $per_hour_info{$d}{$h}{'DELETE'}{count} ) || 1 ); print $fh ""; if (exists $connection_info{chronos}) { print $fh ""; } if (exists $session_info{chronos}) { $per_hour_info{$d}{$h}{'session'}{average} = $session_info{chronos}{"$d"}{"$h"}{duration} / ($session_info{chronos}{"$d"}{"$h"}{count} || 1); print $fh ""; } print $fh "\n"; $c++; } } print $fh "
Day Hour Queries SELECT queries Write queriesConnectionsSessions
Count Min/Max/Avg duration  Count Avg duration  INSERT UPDATE DELETE Avg duration CountAvg/sCountAvg duration 
$zday$h", &comma_numbers($per_hour_info{$d}{$h}{count}), "", &convert_time($per_hour_info{$d}{$h}{min}),"/",&convert_time($per_hour_info{$d}{$h}{max}),"/",&convert_time($per_hour_info{$d}{$h}{average}), "", &comma_numbers($per_hour_info{$d}{$h}{'SELECT'}{count} || 0), "", &convert_time($per_hour_info{$d}{$h}{'SELECT'}{average} || 0), "", &comma_numbers($per_hour_info{$d}{$h}{'INSERT'}{count} || 0), "", &comma_numbers($per_hour_info{$d}{$h}{'UPDATE'}{count} || 0), "", &comma_numbers($per_hour_info{$d}{$h}{'DELETE'}{count} || 0), "", &convert_time($write_average), "", &comma_numbers($connection_info{chronos}{"$d"}{"$h"}{count} || 0), "", &comma_numbers(sprintf("%0.2f", $connection_info{chronos}{"$d"}{"$h"}{count} / 3600)), "/s", &comma_numbers($session_info{chronos}{"$d"}{"$h"}{count} || 0), "", &convert_time($per_hour_info{$d}{$h}{'session'}{average}), "
\n"; if ($graph) { foreach my $tm (sort {$a <=> $b} keys %{$per_minute_info{query}}) { $tm =~ /(\d{4})(\d{2})(\d{2})/; my $y = $1 - 1900; my $mo = $2 - 1; my $d = $3; foreach my $h ("00" .. "23") { my %dataavg = (); foreach my $m ("00" .. "59") { my $rd = &average_per_minutes($m, $avg_minutes); if (exists $per_minute_info{query}{$tm}{$h}{$m}) { # Average per minute $dataavg{average}{"$rd"} += $per_minute_info{query}{$tm}{$h}{$m}{count}; # Search minimum and maximum during this minute foreach my $s (keys %{$per_minute_info{query}{$tm}{$h}{$m}{second}}) { $dataavg{max}{"$rd"} = $per_minute_info{query}{$tm}{$h}{$m}{second}{$s} if ($per_minute_info{query}{$tm}{$h}{$m}{second}{$s} > $dataavg{max}{"$rd"}); $dataavg{min}{"$rd"} = $per_minute_info{query}{$tm}{$h}{$m}{second}{$s} if (not exists $dataavg{min}{"$rd"} || ($per_minute_info{query}{$tm}{$h}{$m}{second}{$s} < $dataavg{min}{"$rd"})); } } } foreach my $rd (@avgs) { my $t = timegm_nocheck(0, $rd, $h, $d, $mo, $y) * 1000; next if ($t < $t_min); last if ($t > $t_max); # Average per minute $d2 .= "[$t, " . int(($dataavg{average}{"$rd"} || 0) / (60 * $avg_minutes)) . "],"; # Maxi per minute $d1 .= "[$t, " . ($dataavg{max}{"$rd"} || 0) . "],"; # Mini per minute $d3 .= "[$t, " . ($dataavg{min}{"$rd"} || 0) . "],"; } } } delete $per_minute_info{query}; $d1 =~ s/,$//; $d2 =~ s/,$//; $d3 =~ s/,$//; &flotr2_graph( 1, 'queriespersecond_graph', $d1, $d2, $d3, 'Queries per second (' . $avg_minutes . ' minutes average)', 'Queries per second', 'Maximum', 'Average', 'Minimum' ); $d1 = ''; $d2 = ''; $d3 = ''; } } if (!$disable_hourly && $connection_info{'count'}) { if ($graph) { if (exists $per_minute_info{connection}) { foreach my $tm (sort {$a <=> $b} keys %{$per_minute_info{connection}}) { $tm =~ /(\d{4})(\d{2})(\d{2})/; my $y = $1 - 1900; my $mo = $2 - 1; my $d = $3; foreach my $h ("00" .. "23") { my %dataavg = (); foreach my $m ("00" .. "59") { my $rd = &average_per_minutes($m, $avg_minutes); if (exists $per_minute_info{connection}{$tm}{$h}{$m}) { # Average per minute $dataavg{average}{"$rd"} += $per_minute_info{connection}{$tm}{$h}{$m}{count}; # Search minimum and maximum during this minute foreach my $s (keys %{$per_minute_info{connection}{$tm}{$h}{$m}{second}}) { $dataavg{max}{"$rd"} = $per_minute_info{connection}{$tm}{$h}{$m}{second}{$s} if ($per_minute_info{connection}{$tm}{$h}{$m}{second}{$s} > $dataavg{max}{"$rd"}); $dataavg{min}{"$rd"} = $per_minute_info{connection}{$tm}{$h}{$m}{second}{$s} if (not exists $dataavg{min}{"$rd"} || ($per_minute_info{connection}{$tm}{$h}{$m}{second}{$s} < $dataavg{min}{"$rd"})); } } } foreach my $rd (@avgs) { my $t = timegm_nocheck(0, $rd, $h, $d, $mo, $y) * 1000; next if ($t < $t_min); last if ($t > $t_max); # Average per minute $d2 .= "[$t, " . int(($dataavg{average}{"$rd"} || 0) / (60 * $avg_minutes)) . "],"; # Maxi per minute $d1 .= "[$t, " . ($dataavg{max}{"$rd"} || 0) . "],"; # Mini per minute $d3 .= "[$t, " . ($dataavg{min}{"$rd"} || 0) . "],"; } } } delete $per_minute_info{connection}; $d1 =~ s/,$//; $d2 =~ s/,$//; $d3 =~ s/,$//; &flotr2_graph( 2, 'connectionspersecond_graph', $d1, $d2, $d3, 'Connections per second (' . $avg_minutes . ' minutes average)', 'Connections per second', 'Maximum', 'Average', 'Minimum' ); $d1 = ''; $d2 = ''; $d3 = ''; } } } if (!$disable_hourly && $overall_stat{'queries_number'}) { if ($graph) { # All queries foreach my $tm (sort {$a <=> $b} keys %per_hour_info) { $tm =~ /(\d{4})(\d{2})(\d{2})/; my $y = $1 - 1900; my $mo = $2 - 1; my $d = $3; foreach my $h ("00" .. "23") { my $t = timegm_nocheck(0, 0, $h, $d, $mo, $y) * 1000; next if ($t < $t_min_hour); last if ($t > $t_max_hour); $d1 .= "[$t, " . ($per_hour_info{$tm}{$h}{count} || 0) . "],"; $d2 .= "[$t, " . sprintf("%.2f", (($per_hour_info{$tm}{$h}{duration} || 0) / ($per_hour_info{$tm}{$h}{count} || 1)) / 1000) . "],"; } } $d1 =~ s/,$//; $d2 =~ s/,$//; &flotr2_graph( 3, 'allqueries_graph', $d1, '', '', 'All queries', 'Queries', 'Number of queries', '', '', 'Duration', $d2, 'Average duration (s)' ); $d1 = ''; $d2 = ''; if (!$disable_query) { # Select queries foreach my $tm (sort {$a <=> $b} keys %per_hour_info) { $tm =~ /(\d{4})(\d{2})(\d{2})/; my $y = $1 - 1900; my $mo = $2 - 1; my $d = $3; foreach my $h ("00" .. "23") { my $t = timegm_nocheck(0, 0, $h, $d, $mo, $y) * 1000; next if ($t < $t_min_hour); last if ($t > $t_max_hour); $d1 .= "[$t, " . ($per_hour_info{$tm}{$h}{'SELECT'}{count} || 0) . "],"; $d2 .= "[$t, " . sprintf( "%.2f", (($per_hour_info{$tm}{$h}{'SELECT'}{duration} || 0) / ($per_hour_info{$tm}{$h}{'SELECT'}{count} || 1)) / 1000 ) . "],"; } } $d1 =~ s/,$//; $d2 =~ s/,$//; &flotr2_graph( 4, 'selectqueries_graph', $d1, '', '', 'SELECT queries', 'Queries', 'Number of queries', '', '', 'Duration', $d2, 'Average duration (s)' ); $d1 = ''; $d2 = ''; # Write queries if (!$select_only) { my $d4 = ''; foreach my $tm (sort {$a <=> $b} keys %per_hour_info) { $tm =~ /(\d{4})(\d{2})(\d{2})/; my $y = $1 - 1900; my $mo = $2 - 1; my $d = $3; foreach my $h ("00" .. "23") { my $t = timegm_nocheck(0, 0, $h, $d, $mo, $y) * 1000; next if ($t < $t_min_hour); last if ($t > $t_max_hour); my $wcount = $per_hour_info{$tm}{$h}{'UPDATE'}{count} + $per_hour_info{$tm}{$h}{'DELETE'}{count} + $per_hour_info{$tm}{$h}{'INSERT'}{count}; my $wduration = $per_hour_info{$tm}{$h}{'UPDATE'}{duration} + $per_hour_info{$tm}{$h}{'DELETE'}{duration} + $per_hour_info{$tm}{$h}{'INSERT'}{duration}; $d1 .= "[$t, " . ($per_hour_info{$tm}{$h}{'DELETE'}{count} || 0) . "],"; $d2 .= "[$t, " . ($per_hour_info{$tm}{$h}{'INSERT'}{count} || 0) . "],"; $d3 .= "[$t, " . ($per_hour_info{$tm}{$h}{'UPDATE'}{count} || 0) . "],"; $d4 .= "[$t, " . sprintf("%.2f", (($wduration || 0) / ($wcount || 1)) / 1000) . "],"; } } $d1 =~ s/,$//; $d2 =~ s/,$//; $d3 =~ s/,$//; $d4 =~ s/,$//; &flotr2_graph( 5, 'writequeries_graph', $d1, $d2, $d3, 'Write queries', 'Queries', 'DELETE queries', 'INSERT queries', 'UPDATE queries', 'Duration', $d4, 'Average duration (s)' ); $d1 = ''; $d2 = ''; $d3 = ''; $d4 = ''; } } } } if (!$disable_hourly && (scalar keys %per_hour_info > 0)) { if ($tempfile_info{count} || exists $checkpoint_info{chronos} || exists $restartpoint_info{chronos} || exists $autovacuum_info{chronos} ) { print $fh qq{}; } if ($tempfile_info{count}) { print $fh qq{}; } if ($checkpoint_info{wbuffer}) { if (exists $checkpoint_info{chronos}) { print $fh qq{}; } } if (exists $checkpoint_info{warning}) { print $fh qq{}; } if ($restartpoint_info{wbuffer}) { if (exists $restartpoint_info{chronos}) { print $fh qq{}; } } if (exists $autovacuum_info{chronos}) { print $fh " \n"; } if ($tempfile_info{count} || exists $checkpoint_info{chronos} || exists $restartpoint_info{chronos}) { print $fh qq{}; } if ($tempfile_info{count}) { print $fh qq{}; } if ($checkpoint_info{wbuffer}) { print $fh qq{}; } if (exists $checkpoint_info{warning}) { print $fh qq{}; } if ($restartpoint_info{wbuffer}) { print $fh qq{}; } if (exists $autovacuum_info{chronos}) { print $fh " \n"; } if ($tempfile_info{count} || exists $checkpoint_info{chronos} || exists $restartpoint_info{chronos}) { print $fh qq{}; foreach my $d (sort {$a <=> $b} keys %per_hour_info) { my $c = 1; $d =~ /^\d{4}(\d{2})(\d{2})$/; my $zday = "$abbr_month{$1} $2"; foreach my $h (sort {$a <=> $b} keys %{$per_hour_info{$d}}) { my $colb = $c % 2; $zday = " " if ($c > 1); print $fh ""; if ($tempfile_info{count}) { my $temp_average = '0'; if ($tempfile_info{chronos}{$d}{$h}{count}) { $temp_average = &comma_numbers( sprintf("%.2f", $tempfile_info{chronos}{$d}{$h}{size} / $tempfile_info{chronos}{$d}{$h}{count})); } print $fh ""; } if (exists $checkpoint_info{chronos} && $checkpoint_info{wbuffer}) { if (exists $checkpoint_info{chronos}{$d}{$h}) { print $fh ""; } else { print $fh ""; } } if (exists $checkpoint_info{chronos} && $checkpoint_info{warning}) { if (exists $checkpoint_info{chronos}{$d}{$h}{warning}) { print $fh ""; } else { print $fh ""; } } if (exists $restartpoint_info{chronos} && $restartpoint_info{wbuffer}) { if (exists $restartpoint_info{chronos}{$d}{$h}) { print $fh ""; } else { print $fh ""; } } if (exists $autovacuum_info{chronos}) { print $fh "", ""; } print $fh "\n"; $c++; } } print $fh "
DayHourTemporary filesCheckpointsCheckpoint warningRestartpointsAutovacuum
CountAvg sizeWritten buffersAddedRemovedRecycledWrite time (sec)Sync time (sec)Total time (sec)CountAvg time (sec)Written buffersWrite time (sec)Sync time (sec)Total time (sec)VACUUMsANALYZEs
$zday$h", &comma_numbers($tempfile_info{chronos}{$d}{$h}{count} || 0), "$temp_average", &comma_numbers($checkpoint_info{chronos}{$d}{$h}{wbuffer}) || 0, "", &comma_numbers($checkpoint_info{chronos}{$d}{$h}{file_added}) || 0, "", &comma_numbers($checkpoint_info{chronos}{$d}{$h}{file_removed}) || 0, "", &comma_numbers($checkpoint_info{chronos}{$d}{$h}{file_recycled}) || 0, "", &comma_numbers($checkpoint_info{chronos}{$d}{$h}{write}) || 0, "", &comma_numbers($checkpoint_info{chronos}{$d}{$h}{sync}) || 0, "", &comma_numbers($checkpoint_info{chronos}{$d}{$h}{total}) || 0, "0000000", &comma_numbers($checkpoint_info{chronos}{$d}{$h}{warning}) || 0, "", &comma_numbers( sprintf( "%.2f", ($checkpoint_info{chronos}{$d}{$h}{warning_seconds} || 0) / ($checkpoint_info{chronos}{$d}{$h}{warning} || 1) ) ) || 0, "00", &comma_numbers($restartpoint_info{chronos}{$d}{$h}{wbuffer}) || 0, "", &comma_numbers($restartpoint_info{chronos}{$d}{$h}{write}) || 0, "", &comma_numbers($restartpoint_info{chronos}{$d}{$h}{sync}) || 0, "", &comma_numbers($restartpoint_info{chronos}{$d}{$h}{total}) || 0, "0000", &comma_numbers($autovacuum_info{chronos}{"$d"}{"$h"}{count} || 0), "", &comma_numbers($autoanalyze_info{chronos}{"$d"}{"$h"}{count} || 0), "
\n"; } } if (!$disable_hourly && $graph) { # checkpoint size if (exists $checkpoint_info{chronos} && $checkpoint_info{wbuffer}) { foreach my $tm (sort {$a <=> $b} keys %{$checkpoint_info{chronos}}) { $tm =~ /(\d{4})(\d{2})(\d{2})/; my $y = $1 - 1900; my $mo = $2 - 1; my $d = $3; foreach my $h ("00" .. "23") { my $t = timegm_nocheck(0, 0, $h, $d, $mo, $y) * 1000; next if ($t < $t_min_hour); last if ($t > $t_max_hour); $d1 .= "[$t, " . ($checkpoint_info{chronos}{$tm}{$h}{wbuffer} || 0) . "],"; } } $d1 =~ s/,$//; &flotr2_graph( 6, 'checkpointwritebuffers_graph', $d1, '', '', 'Checkpoint write buffers', 'Buffers', 'Write buffers', '', '' ); $d1 = ''; foreach my $tm (sort {$a <=> $b} keys %{$checkpoint_info{chronos}}) { $tm =~ /(\d{4})(\d{2})(\d{2})/; my $y = $1 - 1900; my $mo = $2 - 1; my $d = $3; foreach my $h ("00" .. "23") { my $t = timegm_nocheck(0, 0, $h, $d, $mo, $y) * 1000; next if ($t < $t_min_hour); last if ($t > $t_max_hour); $d1 .= "[$t, " . ($checkpoint_info{chronos}{$tm}{$h}{file_added} || 0) . "],"; $d2 .= "[$t, " . ($checkpoint_info{chronos}{$tm}{$h}{file_removed} || 0) . "],"; $d3 .= "[$t, " . ($checkpoint_info{chronos}{$tm}{$h}{file_recycled} || 0) . "],"; } } $d1 =~ s/,$//; $d2 =~ s/,$//; $d3 =~ s/,$//; &flotr2_graph( 7, 'checkpointfiles_graph', $d1, $d2, $d3, 'Checkpoint Wal files usage', 'Number of files', 'Added', 'Removed', 'Recycled' ); $d1 = ''; $d2 = ''; $d3 = ''; } # restartpoint size if (exists $restartpoint_info{chronos} && $restartpoint_info{wbuffer}) { foreach my $tm (sort {$a <=> $b} keys %{$restartpoint_info{chronos}}) { $tm =~ /(\d{4})(\d{2})(\d{2})/; my $y = $1 - 1900; my $mo = $2 - 1; my $d = $3; foreach my $h ("00" .. "23") { my $t = timegm_nocheck(0, 0, $h, $d, $mo, $y) * 1000; next if ($t < $t_min_hour); last if ($t > $t_max_hour); $d1 .= "[$t, " . ($restartpoint_info{chronos}{$tm}{$h}{wbuffer} || 0) . "],"; } } $d1 =~ s/,$//; &flotr2_graph( 6, 'restartpointwritebuffers_graph', $d1, '', '', 'Restartpoint write buffers', 'Buffers', 'Write buffers', '', '' ); $d1 = ''; } # Temporary file size if (exists $tempfile_info{chronos}) { foreach my $tm (sort {$a <=> $b} keys %{$tempfile_info{chronos}}) { $tm =~ /(\d{4})(\d{2})(\d{2})/; my $y = $1 - 1900; my $mo = $2 - 1; my $d = $3; foreach my $h ("00" .. "23") { my $t = timegm_nocheck(0, 0, $h, $d, $mo, $y) * 1000; next if ($t < $t_min_hour); last if ($t > $t_max_hour); $d1 .= "[$t, " . ($tempfile_info{chronos}{$tm}{$h}{size} || 0) . "],"; $d2 .= "[$t, " . ($tempfile_info{chronos}{$tm}{$h}{count} || 0) . "],"; } } $d1 =~ s/,$//; $d2 =~ s/,$//; &flotr2_graph( 8, 'temporaryfile_graph', $d1, '', '', 'Temporary files', 'Size of files', 'Size of files', '', '', 'Number of files', $d2, 'Number of files' ); $d1 = ''; $d2 = ''; } # VACUUMs and ANALYZEs if (exists $autovacuum_info{chronos}) { foreach my $tm (sort {$a <=> $b} keys %{$autovacuum_info{chronos}}) { $tm =~ /(\d{4})(\d{2})(\d{2})/; my $y = $1 - 1900; my $mo = $2 - 1; my $d = $3; foreach my $h ("00" .. "23") { my $t = timegm_nocheck(0, 0, $h, $d, $mo, $y) * 1000; next if ($t < $t_min_hour); last if ($t > $t_max_hour); $d1 .= "[$t, " . ($autovacuum_info{chronos}{$tm}{$h}{count} || 0) . "],"; $d2 .= "[$t, " . ($autoanalyze_info{chronos}{$tm}{$h}{count} || 0) . "],"; } } $d1 =~ s/,$//; $d2 =~ s/,$//; &flotr2_graph( 9, 'autovacuum_graph', $d1, $d2, '', 'Autovacuum actions', '', 'VACUUMs', 'ANALYZEs' ); $d1 = ''; $d2 = ''; } } if ($graph) { # VACUUM stats per table if ($autovacuum_info{count} > 0) { print $fh qq{

VACUUMs by table ^

}; my $total_count = 0; my $total_idxscan = 0; my $total_tuples = 0; my $total_pages = 0; foreach my $t (sort keys %{$autovacuum_info{tables}}) { print $fh "\n"; $total_count += $autovacuum_info{tables}{$t}{vacuums}; $total_idxscan += $autovacuum_info{tables}{$t}{idxscans}; $total_tuples += $autovacuum_info{tables}{$t}{tuples}{removed}; $total_pages += $autovacuum_info{tables}{$t}{pages}{removed}; } print $fh "\n"; print $fh "
Table VACUUMs Index scans Tuples removed Pages removed
", $t, "", $autovacuum_info{tables}{$t}{vacuums}, "", $autovacuum_info{tables}{$t}{idxscans}, "", $autovacuum_info{tables}{$t}{tuples}{removed}, "", $autovacuum_info{tables}{$t}{pages}{removed}, "
Total", $total_count, "", $total_idxscan, "", $total_tuples, "", $total_pages, "
\n"; if ($graph && $total_count) { my %data = (); foreach my $t (sort keys %{$autovacuum_info{tables}}) { if ((($autovacuum_info{tables}{$t}{vacuums} * 100) / $total_count) > $pie_percentage_limit) { $data{$t} = $autovacuum_info{tables}{$t}{vacuums} || 0; } else { $data{"Others"} += $autovacuum_info{tables}{$t}{vacuums} || 0; } } &flotr2_piegraph(18, 'autovacuumbytable_graph', 'Autovacuum per table', %data); %data = (); if ($total_tuples) { print $fh "
\n"; foreach my $t (sort keys %{$autovacuum_info{tables}}) { if ((($autovacuum_info{tables}{$t}{tuples}{removed} * 100) / $total_tuples) > $pie_percentage_limit) { $data{$t} = $autovacuum_info{tables}{$t}{tuples}{removed} || 0; } else { $data{"Others"} += $autovacuum_info{tables}{$t}{tuples}{removed} || 0; } } &flotr2_piegraph(19, 'autovacuumtuplesremoved_graph', 'Autovacuum tuples removed per table', %data); } } print $fh "
\n"; } # ANALYZE stats per table if ($autoanalyze_info{count} > 0) { print $fh qq{

ANALYZEs by table ^

}; my $total_count = 0; my $total_idxscan = 0; foreach my $t (sort keys %{$autoanalyze_info{tables}}) { print $fh "\n"; $total_count += $autoanalyze_info{tables}{$t}{analyzes}; } print $fh "\n"; print $fh "
Table ANALYZEs
", $t, "", $autoanalyze_info{tables}{$t}{analyzes}, "
Total", $total_count, "
\n"; } } # INSERT/DELETE/UPDATE/SELECT repartition $overall_stat{'SELECT'} ||= 0; $overall_stat{'INSERT'} ||= 0; $overall_stat{'UPDATE'} ||= 0; $overall_stat{'DELETE'} ||= 0; my $totala = $overall_stat{'SELECT'} + $overall_stat{'INSERT'} + $overall_stat{'UPDATE'} + $overall_stat{'DELETE'}; if (!$disable_type && $totala) { print $fh qq{

Queries by type ^

}; my $total = $overall_stat{'queries_number'} || 1; print $fh "\n"; print $fh "\n"; print $fh "\n"; print $fh "\n"; print $fh "\n" if (($total - $totala) > 0); print $fh "
Type Count Percentage
SELECT", &comma_numbers($overall_stat{'SELECT'}), "", sprintf("%0.2f", ($overall_stat{'SELECT'} * 100) / $total), "%
INSERT", &comma_numbers($overall_stat{'INSERT'}), "", sprintf("%0.2f", ($overall_stat{'INSERT'} * 100) / $total), "%
UPDATE", &comma_numbers($overall_stat{'UPDATE'}), "", sprintf("%0.2f", ($overall_stat{'UPDATE'} * 100) / $total), "%
DELETE", &comma_numbers($overall_stat{'DELETE'}), "", sprintf("%0.2f", ($overall_stat{'DELETE'} * 100) / $total), "%
OTHERS", &comma_numbers($total - $totala), "", sprintf("%0.2f", (($total - $totala) * 100) / $total), "%
\n"; if ($graph && $totala) { my %data = (); foreach my $t ('SELECT', 'INSERT', 'UPDATE', 'DELETE') { if ((($overall_stat{$t} * 100) / $total) > $pie_percentage_limit) { $data{$t} = $overall_stat{$t} || 0; } else { $data{"Sum types < $pie_percentage_limit%"} += $overall_stat{$t} || 0; } } if (((($total - $totala) * 100) / $total) > $pie_percentage_limit) { $data{'Others'} = $total - $totala; } else { $data{"Sum types < $pie_percentage_limit%"} += $total - $totala; } &flotr2_piegraph(22, 'queriesbytype_graph', 'Type of queries', %data); } print $fh "
\n"; # Show request per database statistics if (scalar keys %database_info > 1) { print $fh qq{

Queries per database ^

}; my $total_count = 0; foreach my $d (sort keys %database_info) { print $fh "\n"; $total_count += $database_info{$d}{count}; foreach my $r (sort keys %{$database_info{$d}}) { next if ($r eq 'count'); print $fh "\n"; } } print $fh "
Database Request type Count
$d", &comma_numbers($database_info{$d}{count}), "
$r", &comma_numbers($database_info{$d}{$r}), "
\n"; if ($graph && $total_count) { my %infos = (); my @small = (); foreach my $d (sort keys %database_info) { if ((($database_info{$d}{count} * 100) / $total_count) > $pie_percentage_limit) { $infos{$d} = $database_info{$d}{count} || 0; } else { $infos{"Sum databases < $pie_percentage_limit%"} += $database_info{$d}{count} || 0; push(@small, $d); } } if ($#small == 0) { $infos{$small[0]} = $infos{"Sum databases < $pie_percentage_limit%"}; delete $infos{"Sum databases < $pie_percentage_limit%"}; } &flotr2_piegraph(20, 'requestsdatabases_graph', 'Queries per database', %infos); } print $fh "
\n"; } # Show request per application statistics if (scalar keys %application_info > 1) { print $fh qq{

Queries per application ^

}; my $total_count = 0; foreach my $d (sort keys %application_info) { print $fh "\n"; $total_count += $application_info{$d}{count}; foreach my $r (sort keys %{$application_info{$d}}) { next if ($r eq 'count'); print $fh "\n"; } } print $fh "
Database Request type Count
$d", &comma_numbers($application_info{$d}{count}), "
$r", &comma_numbers($application_info{$d}{$r}), "
\n"; if ($graph && $total_count) { my %infos = (); my @small = (); foreach my $d (sort keys %application_info) { if ((($application_info{$d}{count} * 100) / $total_count) > $pie_percentage_limit) { $infos{$d} = $application_info{$d}{count} || 0; } else { $infos{"Sum applications < $pie_percentage_limit%"} += $application_info{$d}{count} || 0; push(@small, $d); } } if ($#small == 0) { $infos{$small[0]} = $infos{"Sum applications < $pie_percentage_limit%"}; delete $infos{"Sum applications < $pie_percentage_limit%"}; } &flotr2_piegraph(21, 'requestsapplications_graph', 'Queries per application', %infos); } print $fh "
\n"; } } # Lock stats per type if (!$disable_lock && scalar keys %lock_info > 0) { print $fh qq{

Locks by type ^

}; my $total_count = 0; my $total_duration = 0; foreach my $t (sort keys %lock_info) { print $fh "\n"; foreach my $o (sort keys %{$lock_info{$t}}) { next if (($o eq 'count') || ($o eq 'duration') || ($o eq 'chronos')); print $fh "\n"; } $total_count += $lock_info{$t}{count}; $total_duration += $lock_info{$t}{duration}; } print $fh "\n"; print $fh "
Type Object Count Total Duration Avg duration (s)
$t", &comma_numbers($lock_info{$t}{count}), "", &convert_time($lock_info{$t}{duration}), "", &convert_time($lock_info{$t}{duration} / $lock_info{$t}{count}), "
$o", &comma_numbers($lock_info{$t}{$o}{count}), "", &convert_time($lock_info{$t}{$o}{duration}), "", &convert_time($lock_info{$t}{$o}{duration} / $lock_info{$t}{$o}{count}), "
Total", &comma_numbers($total_count), "", &convert_time($total_duration), "", &convert_time($total_duration / ($total_count || 1)), "
\n"; if ($graph && $total_count) { my %locktype = (); my @small = (); foreach my $d (sort keys %lock_info) { if ((($lock_info{$d}{count} * 100) / $total_count) > $pie_percentage_limit) { $locktype{$d} = $lock_info{$d}{count} || 0; } else { $locktype{"Sum types < $pie_percentage_limit%"} += $lock_info{$d}{count} || 0; push(@small, $d); } } if ($#small == 0) { $locktype{$small[0]} = $locktype{"Sum types < $pie_percentage_limit%"}; delete $locktype{"Sum types < $pie_percentage_limit%"}; } &flotr2_piegraph(10, 'lockbytype_graph', 'Type of locks', %locktype); } print $fh "
\n"; } # Show session per database statistics if (!$disable_session && exists $session_info{database}) { print $fh qq{

Sessions per database ^

}; my $total_count = 0; my $c = 0; foreach my $d (sort keys %{$session_info{database}}) { my $colb = $c % 2; print $fh "\n"; $total_count += $session_info{database}{$d}{count}; $c++; } print $fh "
Database Count Total Duration Avg duration (s)
$d", &comma_numbers($session_info{database}{$d}{count}), "", &convert_time($session_info{database}{$d}{duration}), "", &convert_time($session_info{database}{$d}{duration} / $session_info{database}{$d}{count}), "
\n"; if ($graph && $total_count) { my %infos = (); my @small = (); foreach my $d (sort keys %{$session_info{database}}) { if ((($session_info{database}{$d}{count} * 100) / $total_count) > $pie_percentage_limit) { $infos{$d} = $session_info{database}{$d}{count} || 0; } else { $infos{"Sum sessions < $pie_percentage_limit%"} += $session_info{database}{$d}{count} || 0; push(@small, $d); } } if ($#small == 0) { $infos{$small[0]} = $infos{"Sum sessions < $pie_percentage_limit%"}; delete $infos{"Sum sessions < $pie_percentage_limit%"}; } &flotr2_piegraph(11, 'databasesessions_graph', 'Sessions per database', %infos); } print $fh "
\n"; } # Show session per user statistics if (!$disable_session && exists $session_info{user}) { print $fh qq{

Sessions per user ^

}; my $total_count = 0; my $c = 0; foreach my $d (sort keys %{$session_info{user}}) { my $colb = $c % 2; $total_count += $session_info{user}{$d}{count}; print $fh "\n"; $c++; } print $fh "
User Count Total Duration Avg duration (s)
$d", &comma_numbers($session_info{user}{$d}{count}), "", &convert_time($session_info{user}{$d}{duration}), "", &convert_time($session_info{user}{$d}{duration} / $session_info{user}{$d}{count}), "
\n"; if ($graph && $total_count) { my %infos = (); my @small = (); foreach my $d (sort keys %{$session_info{user}}) { if ((($session_info{user}{$d}{count} * 100) / $total_count) > $pie_percentage_limit) { $infos{$d} = $session_info{user}{$d}{count} || 0; } else { $infos{"Sum sessions < $pie_percentage_limit%"} += $session_info{user}{$d}{count} || 0; push(@small, $d); } } if ($#small == 0) { $infos{$small[0]} = $infos{"Sum sessions < $pie_percentage_limit%"}; delete $infos{"Sum sessions < $pie_percentage_limit%"}; } &flotr2_piegraph(12, 'usersessions_graph', 'Sessions per user', %infos); } print $fh "
\n"; } # Show session per host statistics if (!$disable_session && exists $session_info{host}) { print $fh qq{

Sessions per host ^

}; my $total_count = 0; my $c = 0; foreach my $d (sort keys %{$session_info{host}}) { my $colb = $c % 2; $total_count += $session_info{host}{$d}{count}; print $fh "\n"; $c++; } print $fh "
Host Count Total Duration Avg duration (s)
$d", &comma_numbers($session_info{host}{$d}{count}), "", &convert_time($session_info{host}{$d}{duration}), "", &convert_time($session_info{host}{$d}{duration} / $session_info{host}{$d}{count}), "
\n"; if ($graph && $total_count) { my %infos = (); my @small = (); foreach my $d (sort keys %{$session_info{host}}) { if ((($session_info{host}{$d}{count} * 100) / $total_count) > $pie_percentage_limit) { $infos{$d} = $session_info{host}{$d}{count} || 0; } else { $infos{"Sum sessions < $pie_percentage_limit%"} += $session_info{host}{$d}{count} || 0; push(@small, $d); } } if ($#small == 0) { $infos{$small[0]} = $infos{"Sum sessions < $pie_percentage_limit%"}; delete $infos{"Sum sessions < $pie_percentage_limit%"}; } &flotr2_piegraph(13, 'hostsessions_graph', 'Sessions per host', %infos); } print $fh "
\n"; } # Show connection per database statistics if (!$disable_connection && exists $connection_info{database}) { print $fh qq{

Connections per database ^

}; my $total_count = 0; foreach my $d (sort keys %{$connection_info{database}}) { print $fh "\n"; $total_count += $connection_info{database}{$d}; foreach my $u (sort keys %{$connection_info{user}}) { next if (!exists $connection_info{database_user}{$d}{$u}); print $fh "\n"; } } print $fh "
Database User Count
$d", &comma_numbers($connection_info{database}{$d}), "
$u", &comma_numbers($connection_info{database_user}{$d}{$u}), "
\n"; if ($graph && $total_count) { my %infos = (); my @small = (); foreach my $d (sort keys %{$connection_info{database}}) { if ((($connection_info{database}{$d} * 100) / $total_count) > $pie_percentage_limit) { $infos{$d} = $connection_info{database}{$d} || 0; } else { $infos{"Sum connections < $pie_percentage_limit%"} += $connection_info{database}{$d} || 0; push(@small, $d); } } if ($#small == 0) { $infos{$small[0]} = $infos{"Sum connections < $pie_percentage_limit%"}; delete $infos{"Sum connections < $pie_percentage_limit%"}; } &flotr2_piegraph(14, 'databaseconnections_graph', 'Connections per database', %infos); } print $fh "
\n"; } # Show connection per user statistics if (!$disable_connection && exists $connection_info{user}) { print $fh qq{

Connections per user ^

}; my $total_count = 0; my $c = 0; foreach my $u (sort keys %{$connection_info{user}}) { my $colb = $c % 2; print $fh "\n"; $total_count += $connection_info{user}{$u}; $c++; } print $fh "
User Count
$u", &comma_numbers($connection_info{user}{$u}), "
\n"; if ($graph && $total_count) { my %infos = (); my @small = (); foreach my $d (sort keys %{$connection_info{user}}) { if ((($connection_info{user}{$d} * 100) / $total_count) > $pie_percentage_limit) { $infos{$d} = $connection_info{user}{$d} || 0; } else { $infos{"Sum connections < $pie_percentage_limit%"} += $connection_info{user}{$d} || 0; push(@small, $d); } } if ($#small == 0) { $infos{$small[0]} = $infos{"Sum connections < $pie_percentage_limit%"}; delete $infos{"Sum connections < $pie_percentage_limit%"}; } &flotr2_piegraph(15, 'userconnections_graph', 'Connections per user', %infos); } print $fh "
\n"; } # Show connection per host statistics if (!$disable_connection && exists $connection_info{host}) { print $fh qq{

Connections per host ^

}; my $total_count = 0; my $c = 0; foreach my $h (sort keys %{$connection_info{host}}) { my $colb = $c % 2; print $fh "\n"; $total_count += $connection_info{host}{$h}; $c++; } print $fh "
Host Count
$h", &comma_numbers($connection_info{host}{$h}), "
\n"; if ($graph && $total_count) { my %infos = (); my @small = (); foreach my $d (sort keys %{$connection_info{host}}) { if ((($connection_info{host}{$d} * 100) / $total_count) > $pie_percentage_limit) { $infos{$d} = $connection_info{host}{$d} || 0; } else { $infos{"Sum connections < $pie_percentage_limit%"} += $connection_info{host}{$d} || 0; push(@small, $d); } } if ($#small == 0) { $infos{$small[0]} = $infos{"Sum connections < $pie_percentage_limit%"}; delete $infos{"Sum connections < $pie_percentage_limit%"}; } &flotr2_piegraph(16, 'hostconnections_graph', 'Connections per host', %infos); } print $fh "
\n"; } # Show lock wait detailed informations if (!$disable_lock && scalar keys %lock_info > 0) { my @top_locked_queries; foreach my $h (keys %normalyzed_info) { if (exists($normalyzed_info{$h}{locks})) { push (@top_locked_queries, [$h, $normalyzed_info{$h}{locks}{count}, $normalyzed_info{$h}{locks}{wait}, $normalyzed_info{$h}{locks}{minwait}, $normalyzed_info{$h}{locks}{maxwait}]); } } # Most frequent waiting queries (N) @top_locked_queries = sort {$b->[2] <=> $a->[2]} @top_locked_queries; print $fh qq{

Most frequent waiting queries (N)^

}; my $idx = 1; for (my $i = 0 ; $i <= $#top_locked_queries ; $i++) { last if ($i > $end_top); my $col = $i % 2; print $fh "\n"; $idx++; } print $fh "
Rank Count Total wait time (s) Min/Max/Avg duration (s) Query
", $i + 1, "", $top_locked_queries[$i]->[1], "", &convert_time($top_locked_queries[$i]->[2]), "", &convert_time($top_locked_queries[$i]->[3]), "/", &convert_time($top_locked_queries[$i]->[4]), "/", &convert_time(($top_locked_queries[$i]->[4] / $top_locked_queries[$i]->[1])), "
", &highlight_code($top_locked_queries[$i]->[0]), "
\n"; my $k = $top_locked_queries[$i]->[0]; if ($normalyzed_info{$k}{count} > 1) { print $fh "
"; my $j = 0; foreach my $d (sort {$b <=> $a} keys %{$normalyzed_info{$k}{samples}}) { my $colb = $j % 2; my $db = " - database: $normalyzed_info{$k}{samples}{$d}{db}" if ($normalyzed_info{$k}{samples}{$d}{db}); $db .= ", user: $normalyzed_info{$k}{samples}{$d}{user}" if ($normalyzed_info{$k}{samples}{$d}{user}); $db .= ", remote: $normalyzed_info{$k}{samples}{$d}{remote}" if ($normalyzed_info{$k}{samples}{$d}{remote}); $db .= ", app: $normalyzed_info{$k}{samples}{$d}{app}" if ($normalyzed_info{$k}{samples}{$d}{app}); $db =~ s/^, / - /; print $fh "
", &convert_time($d), " | ", &highlight_code($normalyzed_info{$k}{samples}{$d}{query}), "
"; $j++; } print $fh "
"; } print $fh "
\n"; @top_locked_queries = (); # Queries that waited the most @top_locked_info = sort {$b->[1] <=> $a->[1]} @top_locked_info; print $fh qq{

Queries that waited the most^

}; for (my $i = 0 ; $i <= $#top_locked_info ; $i++) { my $col = $i % 2; my $ttl = $top_locked_info[$i]->[1] || ''; my $db = " - database: $top_locked_info[$i]->[3]" if ($top_locked_info[$i]->[3]); $db .= ", user: $top_locked_info[$i]->[4]" if ($top_locked_info[$i]->[4]); $db .= ", remote: $top_locked_info[$i]->[5]" if ($top_locked_info[$i]->[5]); $db .= ", app: $top_locked_info[$i]->[6]" if ($top_locked_info[$i]->[6]); $db =~ s/^, / - /; print $fh "\n"; } print $fh "
Rank Wait time (s) Query
", $i + 1, "", &convert_time($top_locked_info[$i]->[0]), "
", &highlight_code($top_locked_info[$i]->[2]), "
\n"; } # Show temporary files detailed informations if (!$disable_temporary && scalar keys %tempfile_info > 0) { my @top_temporary; foreach my $h (keys %normalyzed_info) { if (exists($normalyzed_info{$h}{tempfiles})) { push (@top_temporary, [$h, $normalyzed_info{$h}{tempfiles}{count}, $normalyzed_info{$h}{tempfiles}{size}, $normalyzed_info{$h}{tempfiles}{minsize}, $normalyzed_info{$h}{tempfiles}{maxsize}]); } } # Queries generating the most temporary files (N) @top_temporary = sort {$b->[1] <=> $a->[1]} @top_temporary; print $fh qq{

Queries generating the most temporary files (N)^

}; my $idx = 1; for (my $i = 0 ; $i <= $#top_temporary ; $i++) { last if ($i > $end_top); my $col = $i % 2; print $fh "\n"; $idx++; } print $fh "
Rank Count Total size Min/Max/Avg size Query
", $i + 1, "", $top_temporary[$i]->[1], "", &comma_numbers($top_temporary[$i]->[2]), "", &comma_numbers($top_temporary[$i]->[3]), "/", &comma_numbers($top_temporary[$i]->[4]), "/", &comma_numbers(sprintf("%.2f", $top_temporary[$i]->[2] / $top_temporary[$i]->[1])), "
", &highlight_code($top_temporary[$i]->[0]), "
"; my $k = $top_temporary[$i]->[0]; if ($normalyzed_info{$k}{count} > 1) { print $fh "
"; my $i = 0; foreach my $d (sort {$b <=> $a} keys %{$normalyzed_info{$k}{samples}}) { my $colb = $i % 2; my $db = " - database: $normalyzed_info{$k}{samples}{$d}{db}" if ($normalyzed_info{$k}{samples}{$d}{db}); $db .= ", user: $normalyzed_info{$k}{samples}{$d}{user}" if ($normalyzed_info{$k}{samples}{$d}{user}); $db .= ", remote: $normalyzed_info{$k}{samples}{$d}{remote}" if ($normalyzed_info{$k}{samples}{$d}{remote}); $db .= ", app: $normalyzed_info{$k}{samples}{$d}{app}" if ($normalyzed_info{$k}{samples}{$d}{app}); $db =~ s/^, / - /; print $fh "
", &convert_time($d), " | ", &highlight_code($normalyzed_info{$k}{samples}{$d}{query}), "
"; $i++; } print $fh "
"; } print $fh "
\n"; @top_temporary = (); # Top queries generating the largest temporary files @top_tempfile_info = sort {$b->[1] <=> $a->[1]} @top_tempfile_info; print $fh qq{

Queries generating the largest temporary files^

}; for (my $i = 0 ; $i <= $#top_tempfile_info ; $i++) { my $col = $i % 2; my $ttl = $top_tempfile_info[$i]->[1] || ''; my $db = " - database: $top_tempfile_info[$i]->[3]" if ($top_tempfile_info[$i]->[3]); $db .= ", user: $top_tempfile_info[$i]->[4]" if ($top_tempfile_info[$i]->[4]); $db .= ", remote: $top_tempfile_info[$i]->[5]" if ($top_tempfile_info[$i]->[5]); $db .= ", app: $top_tempfile_info[$i]->[6]" if ($top_tempfile_info[$i]->[6]); $db =~ s/^, / - /; print $fh "\n"; } print $fh "
Rank Size Query
", $i + 1, "", &comma_numbers($top_tempfile_info[$i]->[0]), "
", &highlight_code($top_tempfile_info[$i]->[2]), "
\n"; } # Show top information if (!$disable_query && ($#top_slowest >= 0)) { print $fh qq{

Slowest queries ^

}; for (my $i = 0 ; $i <= $#top_slowest ; $i++) { my $col = $i % 2; my $ttl = $top_slowest[$i]->[1] || ''; my $db = " - database: $top_slowest[$i]->[3]" if ($top_slowest[$i]->[3]); $db .= ", user: $top_slowest[$i]->[4]" if ($top_slowest[$i]->[4]); $db .= ", remote: $top_slowest[$i]->[5]" if ($top_slowest[$i]->[5]); $db .= ", app: $top_slowest[$i]->[6]" if ($top_slowest[$i]->[6]); $db =~ s/^, / - /; print $fh "\n"; } print $fh "
Rank Duration (s) Query
", $i + 1, "", &convert_time($top_slowest[$i]->[0]), "
", &highlight_code($top_slowest[$i]->[2]), "
\n"; print $fh qq{

Queries that took up the most time (N) ^

}; my $idx = 1; foreach my $k (sort {$normalyzed_info{$b}{duration} <=> $normalyzed_info{$a}{duration}} keys %normalyzed_info) { next if (!$normalyzed_info{$k}{count}); last if ($idx > $top); my $q = $k; if ($normalyzed_info{$k}{count} == 1) { foreach my $d (sort {$b <=> $a} keys %{$normalyzed_info{$k}{samples}}) { $q = $normalyzed_info{$k}{samples}{$d}{query}; last; } } $normalyzed_info{$k}{average} = $normalyzed_info{$k}{duration} / $normalyzed_info{$k}{count}; my $col = $idx % 2; print $fh ""; print $fh "\n"; $idx++; } print $fh "
Rank Total duration Times executed Min/Max/Avg duration (s) Query
$idx", &convert_time($normalyzed_info{$k}{duration}), "
", &comma_numbers($normalyzed_info{$k}{count}), "
"; foreach my $d (sort keys %{$normalyzed_info{$k}{chronos}}) { my $c = 1; $d =~ /^\d{4}(\d{2})(\d{2})$/; my $zday = "$abbr_month{$1} $2"; foreach my $h (sort keys %{$normalyzed_info{$k}{chronos}{$d}}) { $normalyzed_info{$k}{chronos}{$d}{$h}{average} = $normalyzed_info{$k}{chronos}{$d}{$h}{duration} / $normalyzed_info{$k}{chronos}{$d}{$h}{count}; my $colb = $c % 2; $zday = " " if ($c > 1); print $fh ""; $c++; } } print $fh "
DayHourCountDurationAvg Duration
$zday$h", &comma_numbers($normalyzed_info{$k}{chronos}{$d}{$h}{count}), "", &convert_time($normalyzed_info{$k}{chronos}{$d}{$h}{duration}), "", &convert_time($normalyzed_info{$k}{chronos}{$d}{$h}{average}), "
", &convert_time($normalyzed_info{$k}{min}),"/", &convert_time($normalyzed_info{$k}{max}),"/", &convert_time($normalyzed_info{$k}{average}), "
", &highlight_code($q), "
"; if ($normalyzed_info{$k}{count} > 1) { print $fh "
"; my $i = 0; foreach my $d (sort {$b <=> $a} keys %{$normalyzed_info{$k}{samples}}) { my $colb = $i % 2; my $db = " - database: $normalyzed_info{$k}{samples}{$d}{db}" if ($normalyzed_info{$k}{samples}{$d}{db}); $db .= ", user: $normalyzed_info{$k}{samples}{$d}{user}" if ($normalyzed_info{$k}{samples}{$d}{user}); $db .= ", remote: $normalyzed_info{$k}{samples}{$d}{remote}" if ($normalyzed_info{$k}{samples}{$d}{remote}); $db .= ", app: $normalyzed_info{$k}{samples}{$d}{app}" if ($normalyzed_info{$k}{samples}{$d}{app}); $db =~ s/^, / - /; print $fh "
", &convert_time($d), " | ", &highlight_code($normalyzed_info{$k}{samples}{$d}{query}), "
"; $i++; } print $fh "
"; } print $fh "
\n"; } if (!$disable_query && (scalar keys %normalyzed_info > 0)) { print $fh qq{

Most frequent queries (N) ^

}; my $idx = 1; foreach my $k (sort {$normalyzed_info{$b}{count} <=> $normalyzed_info{$a}{count}} keys %normalyzed_info) { next if (!$normalyzed_info{$k}{count}); last if ($idx > $top); my $q = $k; if ($normalyzed_info{$k}{count} == 1) { foreach my $d (sort {$b <=> $a} keys %{$normalyzed_info{$k}{samples}}) { $q = $normalyzed_info{$k}{samples}{$d}{query}; last; } } my $col = $idx % 2; print $fh ""; print $fh "\n"; $idx++; } print $fh "
Rank Times executed Total duration Min/Max/Avg duration (s) Query
$idx
", &comma_numbers($normalyzed_info{$k}{count}), "
"; foreach my $d (sort keys %{$normalyzed_info{$k}{chronos}}) { my $c = 1; $d =~ /^\d{4}(\d{2})(\d{2})$/; my $zday = "$abbr_month{$1} $2"; foreach my $h (sort keys %{$normalyzed_info{$k}{chronos}{$d}}) { $normalyzed_info{$k}{chronos}{$d}{$h}{average} = $normalyzed_info{$k}{chronos}{$d}{$h}{duration} / $normalyzed_info{$k}{chronos}{$d}{$h}{count}; my $colb = $c % 2; $zday = " " if ($c > 1); print $fh ""; $c++; } } print $fh "
DayHourCountDurationAvg Duration
$zday$h", &comma_numbers($normalyzed_info{$k}{chronos}{$d}{$h}{count}), "", &convert_time($normalyzed_info{$k}{chronos}{$d}{$h}{duration}), "", &convert_time($normalyzed_info{$k}{chronos}{$d}{$h}{average}), "
", &convert_time($normalyzed_info{$k}{duration}), "",&convert_time($normalyzed_info{$k}{min}),"/",&convert_time($normalyzed_info{$k}{max}),"/", &convert_time($normalyzed_info{$k}{average}), "
", &highlight_code($q), "
"; if ($normalyzed_info{$k}{count} > 1) { print $fh "
"; my $i = 0; foreach my $d (sort {$b <=> $a} keys %{$normalyzed_info{$k}{samples}}) { my $colb = $i % 2; my $db = " - database: $normalyzed_info{$k}{samples}{$d}{db}" if ($normalyzed_info{$k}{samples}{$d}{db}); $db .= ", user: $normalyzed_info{$k}{samples}{$d}{user}" if ($normalyzed_info{$k}{samples}{$d}{user}); $db .= ", remote: $normalyzed_info{$k}{samples}{$d}{remote}" if ($normalyzed_info{$k}{samples}{$d}{remote}); $db .= ", app: $normalyzed_info{$k}{samples}{$d}{app}" if ($normalyzed_info{$k}{samples}{$d}{app}); $db =~ s/^, / - /; print $fh "
", &convert_time($d), " | ", &highlight_code($normalyzed_info{$k}{samples}{$d}{query}), "
"; $i++; } print $fh "
"; } print $fh "
\n"; } if (!$disable_query && ($#top_slowest >= 0)) { print $fh qq{

Slowest queries (N) ^

}; my $idx = 1; foreach my $k (sort {$normalyzed_info{$b}{average} <=> $normalyzed_info{$a}{average}} keys %normalyzed_info) { next if (!$k || !$normalyzed_info{$k}{count}); last if ($idx > $top); my $q = $k; if ($normalyzed_info{$k}{count} == 1) { foreach my $d (sort {$b <=> $a} keys %{$normalyzed_info{$k}{samples}}) { $q = $normalyzed_info{$k}{samples}{$d}{query}; last; } } my $col = $idx % 2; print $fh ""; print $fh "\n"; $idx++; } print $fh "
Rank Min/Max/Avg duration (s) Times executed Total duration Query
$idx", &convert_time($normalyzed_info{$k}{min}), "/", &convert_time($normalyzed_info{$k}{max}), "/", &convert_time($normalyzed_info{$k}{average}), "
", &comma_numbers($normalyzed_info{$k}{count}), "
"; foreach my $d (sort keys %{$normalyzed_info{$k}{chronos}}) { my $c = 1; $d =~ /^\d{4}(\d{2})(\d{2})$/; my $zday = "$abbr_month{$1} $2"; foreach my $h (sort keys %{$normalyzed_info{$k}{chronos}{$d}}) { $normalyzed_info{$k}{chronos}{$d}{$h}{average} = $normalyzed_info{$k}{chronos}{$d}{$h}{duration} / $normalyzed_info{$k}{chronos}{$d}{$h}{count}; my $colb = $c % 2; $zday = " " if ($c > 1); print $fh ""; $c++; } } print $fh "
DayHourCountDurationAvg Duration
$zday$h", &comma_numbers($normalyzed_info{$k}{chronos}{$d}{$h}{count}), "", &convert_time($normalyzed_info{$k}{chronos}{$d}{$h}{duration}), "", &convert_time($normalyzed_info{$k}{chronos}{$d}{$h}{average}), "
", &convert_time($normalyzed_info{$k}{duration}), "
", &highlight_code($q), "
"; if ($normalyzed_info{$k}{count} > 1) { print $fh "
"; my $i = 0; foreach my $d (sort {$b <=> $a} keys %{$normalyzed_info{$k}{samples}}) { my $colb = $i % 2; my $db = " - database: $normalyzed_info{$k}{samples}{$d}{db}" if ($normalyzed_info{$k}{samples}{$d}{db}); $db .= ", user: $normalyzed_info{$k}{samples}{$d}{user}" if ($normalyzed_info{$k}{samples}{$d}{user}); $db .= ", remote: $normalyzed_info{$k}{samples}{$d}{remote}" if ($normalyzed_info{$k}{samples}{$d}{remote}); $db .= ", app: $normalyzed_info{$k}{samples}{$d}{app}" if ($normalyzed_info{$k}{samples}{$d}{app}); $db =~ s/^, / - /; print $fh "
", &convert_time($d), " | ", &highlight_code($normalyzed_info{$k}{samples}{$d}{query}), "
"; $i++; } print $fh "
"; } print $fh "
\n"; } if (!$disable_error) { &show_error_as_html(); } # Dump the html footer &html_footer(); } sub dump_error_as_html { # Dump the html header &html_header(); # Global information my $curdate = localtime(time); my $fmt_nlines = &comma_numbers($nlines); my $total_time = timestr($td); $total_time =~ s/^([\.0-9]+) wallclock.*/$1/; $total_time = &convert_time($total_time * 1000); my $logfile_str = $log_files[0]; if ($#log_files > 0) { $logfile_str .= ', ..., ' . $log_files[-1]; } print $fh qq{
  • Generated on $curdate
  • Log file: $logfile_str
  • Parsed $fmt_nlines log entries in $total_time
  • Log start from $overall_stat{'first_log_ts'} to $overall_stat{'last_log_ts'}
}; my $fmt_errors = &comma_numbers($overall_stat{'errors_number'}) || 0; my $fmt_unique_error = &comma_numbers(scalar keys %{$overall_stat{'unique_normalized_errors'}}) || 0; print $fh qq{

Overall statistics ^

  • Number of events: $fmt_errors
  • Number of unique normalized events: $fmt_unique_error
}; &show_error_as_html(); # Dump the html footer &html_footer(); } sub show_error_as_html { return if (scalar keys %error_info == 0); print $fh qq{

Most frequent events (N) ^

}; my $idx = 1; foreach my $k (sort {$error_info{$b}{count} <=> $error_info{$a}{count}} keys %error_info) { next if (!$error_info{$k}{count}); last if ($idx > $top); my $col = $idx % 2; print $fh "\n"; if ($error_info{$k}{count} > 1) { my $msg = $k; $msg =~ s/HINT: (parameter "[^"]+" changed to)/LOG: $1/; $msg =~ s/HINT: (database system was shut down)/LOG: $1/; print $fh "\n"; $idx++; } print $fh "
Rank Times reported Error
$idx
", &comma_numbers($error_info{$k}{count}), ""; print $fh "
"; foreach my $d (sort keys %{$error_info{$k}{chronos}}) { my $c = 1; $d =~ /^\d{4}(\d{2})(\d{2})$/; my $zday = "$abbr_month{$1} $2"; foreach my $h (sort keys %{$error_info{$k}{chronos}{$d}}) { my $colb = $c % 2; $zday = " " if ($c > 1); print $fh ""; $c++; } } print $fh "
DayHourCount
$zday$h", &comma_numbers($error_info{$k}{chronos}{$d}{$h}{count}), "
$msg
"; print $fh "
"; for (my $i = 0 ; $i <= $#{$error_info{$k}{date}} ; $i++) { if ( ($error_info{$k}{error}[$i] =~ s/HINT: (parameter "[^"]+" changed to)/LOG: $1/) || ($error_info{$k}{error}[$i] =~ s/HINT: (database system was shut down)/LOG: $1/)) { $logs_type{HINT}--; $logs_type{LOG}++; } my $c = $i % 2; print $fh "
$error_info{$k}{error}[$i]
\n"; print $fh "
Detail: $error_info{$k}{detail}[$i]
\n" if ($error_info{$k}{detail}[$i]); print $fh "
Context: $error_info{$k}{context}[$i]
\n" if ($error_info{$k}{context}[$i]); print $fh "
Hint: $error_info{$k}{hint}[$i]
\n" if ($error_info{$k}{hint}[$i]); print $fh "
Statement: $error_info{$k}{statement}[$i]
\n" if ($error_info{$k}{statement}[$i]); print $fh "
Database: $error_info{$k}{db}[$i]
\n" if ($error_info{$k}{db}[$i]); } print $fh "
"; } else { if ( ($error_info{$k}{error}[0] =~ s/HINT: (parameter "[^"]+" changed to)/LOG: $1/) || ($error_info{$k}{error}[0] =~ s/HINT: (database system was shut down)/LOG: $1/)) { $logs_type{HINT}--; $logs_type{LOG}++; } print $fh "
$error_info{$k}{error}[0]
"; print $fh "
Detail: $error_info{$k}{detail}[0]
\n" if ($error_info{$k}{detail}[0]); print $fh "
Context: $error_info{$k}{context}[0]
\n" if ($error_info{$k}{context}[0]); print $fh "
Hint: $error_info{$k}{hint}[0]
\n" if ($error_info{$k}{hint}[0]); print $fh "
Statement: $error_info{$k}{statement}[0]
\n" if ($error_info{$k}{statement}[0]); print $fh "
Database: $error_info{$k}{db}[0]
\n" if ($error_info{$k}{db}[0]); } print $fh "
\n"; if (scalar keys %logs_type > 0) { # Show log types print $fh qq{

Logs per type ^

}; my $total_logs = 0; foreach my $d (sort keys %logs_type) { $total_logs += $logs_type{$d}; } my $c = 0; foreach my $d (sort keys %logs_type) { next if (!$logs_type{$d}); my $colb = $c % 2; print $fh "\n"; $c++; } print $fh "
Type Count Percentage
$d", &comma_numbers($logs_type{$d}), "", sprintf("%0.2f", ($logs_type{$d} * 100) / $total_logs), "%
\n"; if ($graph && $total_logs) { my %infos = (); my @small = (); foreach my $d (sort keys %logs_type) { if ((($logs_type{$d} * 100) / $total_logs) > $pie_percentage_limit) { $infos{$d} = $logs_type{$d} || 0; } else { $infos{"Sum log types < $pie_percentage_limit%"} += $logs_type{$d} || 0; push(@small, $d); } } if ($#small == 0) { $infos{$small[0]} = $infos{"Sum log types < $pie_percentage_limit%"}; delete $infos{"Sum log types < $pie_percentage_limit%"}; } &flotr2_piegraph(17, 'logstype_graph', 'Logs per type', %infos); } print $fh "
\n"; } } sub load_stats { my $fd = shift; my %stats = %{ fd_retrieve($fd) }; my %_overall_stat = %{$stats{overall_stat}}; my %_normalyzed_info = %{$stats{normalyzed_info}}; my %_error_info = %{$stats{error_info}}; my %_connection_info = %{$stats{connection_info}}; my %_database_info = %{$stats{database_info}}; my %_application_info = %{$stats{application_info}}; my %_checkpoint_info = %{$stats{checkpoint_info}}; my %_restartpoint_info = %{$stats{restartpoint_info}}; my %_session_info = %{$stats{session_info}}; my %_tempfile_info = %{$stats{tempfile_info}}; my %_logs_type = %{$stats{logs_type}}; my %_lock_info = %{$stats{lock_info}}; my %_per_hour_info = %{$stats{per_hour_info}}; my %_per_minute_info = %{$stats{per_minute_info}}; my @_top_slowest = @{$stats{top_slowest}}; my $_nlines = $stats{nlines}; my $_first_log_timestamp = $stats{first_log_timestamp}; my $_last_log_timestamp = $stats{last_log_timestamp}; my @_log_files = @{$stats{log_files}}; my %_autovacuum_info = %{$stats{autovacuum_info}}; my %_autoanalyze_info = %{$stats{autoanalyze_info}}; return if (!$_overall_stat{queries_number} && !$_overall_stat{'errors_number'}); ### overall_stat ### $overall_stat{queries_number} += $_overall_stat{queries_number}; $overall_stat{'first_log_ts'} = $_overall_stat{'first_log_ts'} if not $overall_stat{'first_log_ts'} or $overall_stat{'first_log_ts'} gt $_overall_stat{'first_log_ts'}; $overall_stat{'last_log_ts'} = $_overall_stat{'last_log_ts'} if not $overall_stat{'last_log_ts'} or $overall_stat{'last_log_ts'} lt $_overall_stat{'last_log_ts'}; $overall_stat{first_query_ts} = $_overall_stat{first_query_ts} if not $overall_stat{first_query_ts} or $overall_stat{first_query_ts} gt $_overall_stat{first_query_ts}; $overall_stat{last_query_ts} = $_overall_stat{last_query_ts} if not $overall_stat{last_query_ts} or $overall_stat{last_query_ts} lt $_overall_stat{last_query_ts}; $overall_stat{errors_number} += $_overall_stat{errors_number}; $overall_stat{queries_duration} += $_overall_stat{queries_duration}; $overall_stat{DELETE} += $_overall_stat{DELETE} if exists $_overall_stat{DELETE}; $overall_stat{UPDATE} += $_overall_stat{UPDATE} if exists $_overall_stat{UPDATE}; $overall_stat{INSERT} += $_overall_stat{INSERT} if exists $_overall_stat{INSERT}; $overall_stat{SELECT} += $_overall_stat{SELECT} if exists $_overall_stat{SELECT}; foreach my $k (keys %{$_overall_stat{query_peak}}) { $overall_stat{query_peak}{$k} += $_overall_stat{query_peak}{$k}; } # FIXME == $error_info ?? foreach my $k (keys %{$_overall_stat{unique_normalized_errors}}) { $overall_stat{unique_normalized_errors}{$k} += $_overall_stat{unique_normalized_errors}{$k}; } $logs_type{ERROR} += $_logs_type{ERROR} if exists $_logs_type{ERROR}; $logs_type{LOG} += $_logs_type{LOG} if exists $_logs_type{LOG}; $logs_type{DETAIL} += $_logs_type{DETAIL} if exists $_logs_type{DETAIL}; $logs_type{STATEMENT} += $_logs_type{STATEMENT} if exists $_logs_type{STATEMENT}; ### database_info ### foreach my $db (keys %_database_info) { foreach my $k (keys %{ $_database_info{$db} }) { $database_info{$db}{$k} += $_database_info{$db}{$k}; } } ### application_info ### foreach my $app (keys %_application_info) { foreach my $k (keys %{ $_application_info{$app} }) { $application_info{$app}{$k} += $_application_info{$app}{$k}; } } ### connection_info ### foreach my $db (keys %{ $_connection_info{database} }) { $connection_info{database}{$db} += $_connection_info{database}{$db}; } foreach my $db (keys %{ $_connection_info{database_user} }) { foreach my $user (keys %{ $_connection_info{database_user}{$db} }) { $connection_info{database_user}{$db}{$user} += $_connection_info{database_user}{$db}{$user}; } } foreach my $user (keys %{ $_connection_info{user} }) { $connection_info{user}{$user} += $_connection_info{user}{$user}; } foreach my $host (keys %{ $_connection_info{host} }) { $connection_info{host}{$host} += $_connection_info{host}{$host}; } $connection_info{count} += $_connection_info{count}; foreach my $day (keys %{ $_connection_info{chronos} }) { foreach my $hour (keys %{ $_connection_info{chronos}{$day} }) { foreach my $db (keys %{ $_connection_info{chronos}{$day}{$hour}{database} }) { $connection_info{chronos}{$day}{$hour}{database}{$db} += $_connection_info{chronos}{$day}{$hour}{database}{$db}; } foreach my $db (keys %{ $_connection_info{chronos}{$day}{$hour}{database_user} }) { foreach my $user (keys %{ $_connection_info{chronos}{$day}{$hour}{database_user}{$db} }) { $connection_info{chronos}{$day}{$hour}{database_user}{$db}{$user} += $_connection_info{chronos}{$day}{$hour}{database_user}{$db}{$user}; } } $connection_info{chronos}{$day}{$hour}{count} += $_connection_info{chronos}{$day}{$hour}{count}; foreach my $user (keys %{ $_connection_info{chronos}{$day}{$hour}{user} }) { $connection_info{chronos}{$day}{$hour}{user}{$user} += $_connection_info{chronos}{$day}{$hour}{user}{$user}; } foreach my $host (keys %{ $_connection_info{chronos}{$day}{$hour}{host} }) { $connection_info{chronos}{$day}{$hour}{host}{$host} += $_connection_info{chronos}{$day}{$hour}{host}{$host}; } } } ### log_files ### foreach my $f (@_log_files) { push(@log_files, $f) if (!grep(m#^$f$#, @_log_files)); } ### per_hour_info ### foreach my $day (keys %_per_hour_info) { foreach my $hour (keys %{ $_per_hour_info{$day} }) { $per_hour_info{$day}{$hour}{count} += $_per_hour_info{$day}{$hour}{count}; $per_hour_info{$day}{$hour}{duration} += $_per_hour_info{$day}{$hour}{duration}; # Set min / max duration for this query if (!exists $per_hour_info{$day}{$hour}{min} || ($per_hour_info{$day}{$hour}{min} > $_per_hour_info{$day}{$hour}{min})) { $per_hour_info{$day}{$hour}{min} = $_per_hour_info{$day}{$hour}{min}; } if (!exists $per_hour_info{$day}{$hour}{max} || ($per_hour_info{$day}{$hour}{max} < $_per_hour_info{$day}{$hour}{max})) { $per_hour_info{$day}{$hour}{max} = $_per_hour_info{$day}{$hour}{max}; } if (exists $_per_hour_info{$day}{$hour}{DELETE}) { $per_hour_info{$day}{$hour}{DELETE}{count} += $_per_hour_info{$day}{$hour}{DELETE}{count}; $per_hour_info{$day}{$hour}{DELETE}{duration} += $_per_hour_info{$day}{$hour}{DELETE}{duration}; } if (exists $_per_hour_info{$day}{$hour}{SELECT}) { $per_hour_info{$day}{$hour}{SELECT}{count} += $_per_hour_info{$day}{$hour}{SELECT}{count}; $per_hour_info{$day}{$hour}{SELECT}{duration} += $_per_hour_info{$day}{$hour}{SELECT}{duration}; } if (exists $_per_hour_info{$day}{$hour}{INSERT}) { $per_hour_info{$day}{$hour}{INSERT}{count} += $_per_hour_info{$day}{$hour}{INSERT}{count}; $per_hour_info{$day}{$hour}{INSERT}{duration} += $_per_hour_info{$day}{$hour}{INSERT}{duration}; } if (exists $_per_hour_info{$day}{$hour}{UPDATE}) { $per_hour_info{$day}{$hour}{UPDATE}{count} += $_per_hour_info{$day}{$hour}{UPDATE}{count}; $per_hour_info{$day}{$hour}{UPDATE}{duration} += $_per_hour_info{$day}{$hour}{UPDATE}{duration}; } } } ### error_info ### foreach my $q (keys %_error_info) { $error_info{$q}{count} += $_error_info{$q}{count}; # Keep only the wanted sample number if (!exists $error_info{$q}{date} || ($#{$error_info{$q}{date}} < $sample)) { push(@{$error_info{$q}{date}}, @{$_error_info{$q}{date}}); push(@{$error_info{$q}{detail}}, @{$_error_info{$q}{detail}}); push(@{$error_info{$q}{context}}, @{$_error_info{$q}{context}}); push(@{$error_info{$q}{statement}}, @{$_error_info{$q}{statement}}); push(@{$error_info{$q}{hint}}, @{$_error_info{$q}{hint}}); push(@{$error_info{$q}{error}}, @{$_error_info{$q}{error}}); push(@{$error_info{$q}{db}}, @{$_error_info{$q}{db}}); foreach my $day (keys %{ $_error_info{$q}{chronos} }) { foreach my $hour (keys %{$_error_info{$q}{chronos}{$day}}) { $error_info{$q}{chronos}{$day}{$hour}{count} += $_error_info{$q}{chronos}{$day}{$hour}{count}; } } } } ### per_minute_info ### foreach my $day (keys %{ $_per_minute_info{connection} }) { foreach my $hour (keys %{ $_per_minute_info{connection}{$day} }) { foreach my $min (keys %{ $_per_minute_info{connection}{$day}{$hour} }) { $per_minute_info{connection}{$day}{$hour}{$min}{count} += $_per_minute_info{connection}{$day}{$hour}{$min}{count}; foreach my $sec (keys %{ $_per_minute_info{connection}{$day}{$hour}{$min}{second} }) { $per_minute_info{connection}{$day}{$hour}{$min}{second}{$sec} += $_per_minute_info{connection}{$day}{$hour}{$min}{second}{$sec}; } } } } foreach my $day (keys %{ $_per_minute_info{query} }) { foreach my $hour (keys %{ $_per_minute_info{query}{$day} }) { foreach my $min (keys %{ $_per_minute_info{query}{$day}{$hour} }) { $per_minute_info{query}{$day}{$hour}{$min}{count} += $_per_minute_info{query}{$day}{$hour}{$min}{count}; $per_minute_info{query}{$day}{$hour}{$min}{duration} += $_per_minute_info{query}{$day}{$hour}{$min}{duration}; foreach my $sec (keys %{ $_per_minute_info{query}{$day}{$hour}{$min}{second} }) { $per_minute_info{query}{$day}{$hour}{$min}{second}{$sec} += $_per_minute_info{query}{$day}{$hour}{$min}{second}{$sec}; } } } } ### lock_info ### foreach my $lock (keys %_lock_info) { $lock_info{$lock}{count} += $_lock_info{$lock}{count}; foreach my $day (keys %{ $_lock_info{chronos} }) { foreach my $hour (keys %{ $_lock_info{chronos}{$day} }) { $lock_info{chronos}{$day}{$hour}{count} += $_lock_info{chronos}{$day}{$hour}{count}; $lock_info{chronos}{$day}{$hour}{duration} += $_lock_info{chronos}{$day}{$hour}{duration}; } } $lock_info{$lock}{duration} += $_lock_info{$lock}{duration}; foreach my $type (keys %{$_lock_info{$lock}}) { next if $type =~ /^(count|chronos|duration)$/; $lock_info{$lock}{$type}{count} += $_lock_info{$lock}{$type}{count}; $lock_info{$lock}{$type}{duration} += $_lock_info{$lock}{$type}{duration}; } } ### nlines ### $nlines += $_nlines; ### normalyzed_info ### foreach my $stmt (keys %_normalyzed_info) { foreach my $dt (keys %{$_normalyzed_info{$stmt}{samples}} ) { $normalyzed_info{$stmt}{samples}{$dt} = $_normalyzed_info{$stmt}{samples}{$dt}; } # Keep only the top N samples my $i = 1; foreach my $k (sort {$b <=> $a} keys %{$normalyzed_info{$stmt}{samples}}) { if ($i > $sample) { delete $normalyzed_info{$stmt}{samples}{$k}; } $i++; } $normalyzed_info{$stmt}{count} += $_normalyzed_info{$stmt}{count}; # Set min / max duration for this query if (!exists $normalyzed_info{$stmt}{min} || ($normalyzed_info{$stmt}{min} > $_normalyzed_info{$stmt}{min})) { $normalyzed_info{$stmt}{min} = $_normalyzed_info{$stmt}{min}; } if (!exists $normalyzed_info{$stmt}{max} || ($normalyzed_info{$stmt}{max} < $_normalyzed_info{$stmt}{max})) { $normalyzed_info{$stmt}{max} = $_normalyzed_info{$stmt}{max}; } foreach my $day (keys %{$_normalyzed_info{$stmt}{chronos}} ) { foreach my $hour (keys %{$_normalyzed_info{$stmt}{chronos}{$day}} ) { $normalyzed_info{$stmt}{chronos}{$day}{$hour}{count} += $_normalyzed_info{$stmt}{chronos}{$day}{$hour}{count}; $normalyzed_info{$stmt}{chronos}{$day}{$hour}{duration} += $_normalyzed_info{$stmt}{chronos}{$day}{$hour}{duration}; } } $normalyzed_info{$stmt}{duration} += $_normalyzed_info{$stmt}{duration}; if (exists $_normalyzed_info{$stmt}{locks}) { $normalyzed_info{$stmt}{locks}{count} += $_normalyzed_info{$stmt}{locks}{count}; $normalyzed_info{$stmt}{locks}{wait} += $_normalyzed_info{$stmt}{locks}{wait}; if (!exists $normalyzed_info{$stmt}{locks}{minwait} || ($normalyzed_info{$stmt}{locks}{minwait} > $_normalyzed_info{$stmt}{locks}{minwait})) { $normalyzed_info{$stmt}{locks}{minwait} = $_normalyzed_info{$stmt}{locks}{minwait}; } if (!exists $normalyzed_info{$stmt}{locks}{maxwait} || ($normalyzed_info{$stmt}{locks}{maxwait} < $_normalyzed_info{$stmt}{locks}{maxwait})) { $normalyzed_info{$stmt}{locks}{maxwait} = $_normalyzed_info{$stmt}{locks}{maxwait}; } } if (exists $_normalyzed_info{$stmt}{tempfiles}) { $normalyzed_info{$stmt}{tempfiles}{count} += $_normalyzed_info{$stmt}{tempfiles}{count}; $normalyzed_info{$stmt}{tempfiles}{size} += $_normalyzed_info{$stmt}{tempfiles}{size}; if (!exists $normalyzed_info{$stmt}{tempfiles}{minsize} || ($normalyzed_info{$stmt}{tempfiles}{minsize} > $_normalyzed_info{$stmt}{tempfiles}{minsize})) { $normalyzed_info{$stmt}{tempfiles}{minsize} = $_normalyzed_info{$stmt}{tempfiles}{minsize}; } if (!exists $normalyzed_info{$stmt}{tempfiles}{maxsize} || ($normalyzed_info{$stmt}{tempfiles}{maxsize} < $_normalyzed_info{$stmt}{tempfiles}{maxsize})) { $normalyzed_info{$stmt}{tempfiles}{maxsize} = $_normalyzed_info{$stmt}{tempfiles}{maxsize}; } } } ### session_info ### foreach my $db (keys %{ $_session_info{database}}) { $session_info{database}{$db}{count} += $_session_info{database}{$db}{count}; $session_info{database}{$db}{duration} += $_session_info{database}{$db}{duration}; } $session_info{count} += $_session_info{count}; foreach my $day (keys %{ $_session_info{chronos}}) { foreach my $hour (keys %{ $_session_info{chronos}{$day}}) { $session_info{chronos}{$day}{$hour}{count} += $_session_info{chronos}{$day}{$hour}{count}; $session_info{chronos}{$day}{$hour}{duration} += $_session_info{chronos}{$day}{$hour}{duration}; } } foreach my $user (keys %{ $_session_info{user}}) { $session_info{user}{$user}{count} += $_session_info{user}{$user}{count}; $session_info{user}{$user}{duration} += $_session_info{user}{$user}{duration}; } $session_info{duration} += $_session_info{duration}; foreach my $host (keys %{ $_session_info{host}}) { $session_info{host}{$host}{count} += $_session_info{host}{$host}{count}; $session_info{host}{$host}{duration} += $_session_info{host}{$host}{duration}; } ### tempfile_info ### $tempfile_info{count} += $_tempfile_info{count} if defined $_tempfile_info{count}; $tempfile_info{size} += $_tempfile_info{size} if defined $_tempfile_info{size}; $tempfile_info{maxsize} = $_tempfile_info{maxsize} if defined $_tempfile_info{maxsize} and ( not defined $tempfile_info{maxsize} or $tempfile_info{maxsize} < $_tempfile_info{maxsize} ); foreach my $day ( %{ $_tempfile_info{chronos} } ) { foreach my $hour ( %{ $_tempfile_info{chronos}{$day} } ) { $tempfile_info{chronos}{$day}{$hour}{count} += $_tempfile_info{chronos}{$day}{$hour}{count} if defined $_tempfile_info{chronos}{$day}{$hour}{count}; $tempfile_info{chronos}{$day}{$hour}{size} += $_tempfile_info{chronos}{$day}{$hour}{size} if defined $_tempfile_info{chronos}{$day}{$hour}{size}; } } ### top_slowest ### my @tmp_top_slowest = sort {$b->[0] <=> $a->[0]} (@top_slowest, @_top_slowest); @top_slowest = (); for (my $i = 0; $i <= $#tmp_top_slowest; $i++) { last if ($i == $end_top); push(@top_slowest, $tmp_top_slowest[$i]); } ### checkpoint_info ### $checkpoint_info{file_removed} += $_checkpoint_info{file_removed}; $checkpoint_info{sync} += $_checkpoint_info{sync}; $checkpoint_info{wbuffer} += $_checkpoint_info{wbuffer}; $checkpoint_info{file_recycled} += $_checkpoint_info{file_recycled}; $checkpoint_info{total} += $_checkpoint_info{total}; $checkpoint_info{file_added} += $_checkpoint_info{file_added}; $checkpoint_info{write} += $_checkpoint_info{write}; foreach my $day (keys %{ $_checkpoint_info{chronos} }) { foreach my $hour (keys %{ $_checkpoint_info{chronos}{$day} }) { $checkpoint_info{chronos}{$day}{$hour}{file_removed} += $_checkpoint_info{chronos}{$day}{$hour}{file_removed}; $checkpoint_info{chronos}{$day}{$hour}{sync} += $_checkpoint_info{chronos}{$day}{$hour}{sync}; $checkpoint_info{chronos}{$day}{$hour}{wbuffer} += $_checkpoint_info{chronos}{$day}{$hour}{wbuffer}; $checkpoint_info{chronos}{$day}{$hour}{file_recycled} += $_checkpoint_info{chronos}{$day}{$hour}{file_recycled}; $checkpoint_info{chronos}{$day}{$hour}{total} += $_checkpoint_info{chronos}{$day}{$hour}{total}; $checkpoint_info{chronos}{$day}{$hour}{file_added} += $_checkpoint_info{chronos}{$day}{$hour}{file_added}; $checkpoint_info{chronos}{$day}{$hour}{write} += $_checkpoint_info{chronos}{$day}{$hour}{write}; } } ### restartpoint_info ### $restartpoint_info{sync} += $_restartpoint_info{sync}; $restartpoint_info{wbuffer} += $_restartpoint_info{wbuffer}; $restartpoint_info{total} += $_restartpoint_info{total}; $restartpoint_info{write} += $_restartpoint_info{write}; foreach my $day (keys %{ $_restartpoint_info{chronos} }) { foreach my $hour (keys %{ $_restartpoint_info{chronos}{$day} }) { $restartpoint_info{chronos}{$day}{$hour}{sync} += $_restartpoint_info{chronos}{$day}{$hour}{sync}; $restartpoint_info{chronos}{$day}{$hour}{wbuffer} += $_restartpoint_info{chronos}{$day}{$hour}{wbuffer}; $restartpoint_info{chronos}{$day}{$hour}{total} += $_restartpoint_info{chronos}{$day}{$hour}{total}; $restartpoint_info{chronos}{$day}{$hour}{write} += $_restartpoint_info{chronos}{$day}{$hour}{write}; } } #### Autovacuum infos #### $autovacuum_info{count} += $_autovacuum_info{count}; foreach my $day (keys %{ $_autovacuum_info{chronos} }) { foreach my $hour (keys %{ $_autovacuum_info{chronos}{$day} }) { $autovacuum_info{chronos}{$day}{$hour}{count} += $_autovacuum_info{chronos}{$day}{$hour}{count}; } } foreach my $table (keys %{ $_autovacuum_info{tables} }) { $autovacuum_info{tables}{$table}{vacuums} += $_autovacuum_info{tables}{$table}{vacuums}; $autovacuum_info{tables}{$table}{idxscans} += $_autovacuum_info{tables}{$table}{idxscans}; $autovacuum_info{tables}{$table}{tuples}{removed} += $_autovacuum_info{tables}{$table}{tuples}{removed}; $autovacuum_info{tables}{$table}{pages}{removed} += $_autovacuum_info{tables}{$table}{pages}{removed}; } #### Autoanalyze infos #### $autoanalyze_info{count} += $_autoanalyze_info{count}; foreach my $day (keys %{ $_autoanalyze_info{chronos} }) { foreach my $hour (keys %{ $_autoanalyze_info{chronos}{$day} }) { $autoanalyze_info{chronos}{$day}{$hour}{count} += $_autoanalyze_info{chronos}{$day}{$hour}{count}; } } foreach my $table (keys %{ $_autoanalyze_info{tables} }) { $autoanalyze_info{tables}{$table}{analyzes} += $_autoanalyze_info{tables}{$table}{analyzes}; } return; } sub dump_as_binary { my $lfh = shift(); store_fd({ 'overall_stat' => \%overall_stat, 'normalyzed_info' => \%normalyzed_info, 'error_info' => \%error_info, 'connection_info' => \%connection_info, 'database_info' => \%database_info, 'application_info' => \%application_info, 'checkpoint_info' => \%checkpoint_info, 'restartpoint_info' => \%restartpoint_info, 'session_info' => \%session_info, 'tempfile_info' => \%tempfile_info, 'error_info' => \%error_info, 'logs_type' => \%logs_type, 'lock_info' => \%lock_info, 'per_hour_info' => \%per_hour_info, 'per_minute_info' => \%per_minute_info, 'top_slowest' => \@top_slowest, 'nlines' => $nlines, 'log_files' => \@log_files, 'autovacuum_info' => \%autovacuum_info, 'autoanalyze_info' => \%autoanalyze_info }, $lfh) || die ("Couldn't save binary data to «$outfile»!\n"); } # Highlight SQL code sub highlight_code { my $code = shift; # Try to escape HTML code $code =~ s/<([\/a-zA-Z])\b/\<$1/sg; # Do not try to prettify queries longuer # than 10KB this will take too much time return $code if (length($code) > 10240); # prettify SQL query if (!$noprettify) { $sql_prettified->query($code); $code = $sql_prettified->beautify; } return $code if ($nohighlight); my $i = 0; my @qqcode = (); while ($code =~ s/("[^\"]*")/QQCODEY${i}A/s) { push(@qqcode, $1); $i++; } $i = 0; my @qcode = (); while ($code =~ s/('[^\']*')/QCODEY${i}B/s) { push(@qcode, $1); $i++; } foreach my $x (keys %SYMBOLS) { $code =~ s/$x/\$\$STYLESY0A\$\$$SYMBOLS{$x}\$\$STYLESY0B\$\$/gs; } for (my $x = 0 ; $x <= $#KEYWORDS1 ; $x++) { $code =~ s/\b$KEYWORDS1[$x]\b/$KEYWORDS1[$x]<\/span>/igs; $code =~ s/(?$KEYWORDS1[$x]<\/span>/igs; } for (my $x = 0 ; $x <= $#KEYWORDS2 ; $x++) { $code =~ s/(?$KEYWORDS2[$x]<\/span>/igs; } for (my $x = 0 ; $x <= $#KEYWORDS3 ; $x++) { $code =~ s/\b$KEYWORDS3[$x]\b/$KEYWORDS3[$x]<\/span>/igs; } for (my $x = 0 ; $x <= $#BRACKETS ; $x++) { $code =~ s/($BRACKETS[$x])/$1<\/span>/igs; } $code =~ s/\$\$STYLESY0A\$\$([^\$]+)\$\$STYLESY0B\$\$/$1<\/span>/gs; $code =~ s/\b(\d+)\b/$1<\/span>/igs; for (my $x = 0; $x <= $#qcode; $x++) { $code =~ s/QCODEY${x}B/$qcode[$x]/s; } for (my $x = 0; $x <= $#qqcode; $x++) { $code =~ s/QQCODEY${x}A/$qqcode[$x]/s; } $code =~ s/('[^']*')/$1<\/span>/gs; $code =~ s/(`[^`]*`)/$1<\/span>/gs; return $code; } sub compute_arg_list { # Some command lines arguments can be used multiple time or be written # as a coma separated list. # For example: --dbuser=postgres --dbuser=joe or --dbuser=postgres,joe # So we have to aggregate all the possible value my @tmp = (); foreach my $v (@exclude_user) { push(@tmp, split(/,/, $v)); } @exclude_user = (); push(@exclude_user, @tmp); @tmp = (); foreach my $v (@dbname) { push(@tmp, split(/,/, $v)); } @dbname = (); push(@dbname, @tmp); @tmp = (); foreach my $v (@dbuser) { push(@tmp, split(/,/, $v)); } @dbuser = (); push(@dbuser, @tmp); @tmp = (); foreach my $v (@dbclient) { push(@tmp, split(/,/, $v)); } @dbclient = (); push(@dbclient, @tmp); @tmp = (); foreach my $v (@dbappname) { push(@tmp, split(/,/, $v)); } @dbappname = (); push(@dbappname, @tmp); } sub validate_log_line { my ($t_pid) = @_; # Check user and/or database if require if ($#dbname >= 0) { # Log line do not match the required dbname if (!$prefix_vars{'t_dbname'} || !grep(/^$prefix_vars{'t_dbname'}$/i, @dbname)) { delete $cur_info{$t_pid}; return 0; } } if ($#dbuser >= 0) { # Log line do not match the required dbuser if (!$prefix_vars{'t_dbuser'} || !grep(/^$prefix_vars{'t_dbuser'}$/i, @dbuser)) { delete $cur_info{$t_pid}; return 0; } } if ($#dbclient >= 0) { # Log line does not match the required dbclient $prefix_vars{'t_client'} ||= $prefix_vars{'t_hostport'}; if (!$prefix_vars{'t_client'} || !grep(/^$prefix_vars{'t_client'}$/i, @dbclient)) { delete $cur_info{$t_pid}; return 0; } } if ($#dbappname >= 0) { # Log line does not match the required dbname if (!$prefix_vars{'t_appname'} || !grep(/^$prefix_vars{'t_appname'}$/i, @dbappname)) { delete $cur_info{$t_pid}; return 0; } } if ($#exclude_user >= 0) { # Log line matches the excluded dbuser if ($prefix_vars{'t_dbuser'} && grep(/^$prefix_vars{'t_dbuser'}$/i, @exclude_user)) { delete $cur_info{$t_pid}; return 0; } } return 1; } sub parse_log_prefix { my ($t_logprefix) = @_; # Extract user and database information from the logprefix part if ($t_logprefix) { # Search for database user if ($t_logprefix =~ $regex_prefix_dbuser) { $prefix_vars{'t_dbuser'} = $1; } # Search for database name if ($t_logprefix =~ $regex_prefix_dbname) { $prefix_vars{'t_dbname'} = $1; } } } sub parse_query { my $t_pid = $prefix_vars{'t_pid'}; # Force parameter change to be a hint message so that it can appear # in the event/error/warning messages report part. if ($prefix_vars{'t_loglevel'} eq 'LOG') { if ($prefix_vars{'t_query'} =~ /parameter "[^"]+" changed to "[^"]+"/) { $prefix_vars{'t_loglevel'} = 'HINT'; } elsif ($prefix_vars{'t_query'} =~ /database system was shut down at /) { $prefix_vars{'t_loglevel'} = 'HINT'; } } # Do not parse lines that are not an error like message if ($error_only && ($prefix_vars{'t_loglevel'} !~ /(WARNING|ERROR|FATAL|PANIC|DETAIL|HINT|STATEMENT|CONTEXT)/)) { if (exists $cur_info{$t_pid} && (!$prefix_vars{'t_session_line'} || ($prefix_vars{'t_session_line'} != $cur_info{$t_pid}{session}))) { &store_queries($t_pid); delete $cur_info{$t_pid}; } return; } # Do not parse lines that are an error like message if ($disable_error && ($prefix_vars{'t_loglevel'} =~ /WARNING|ERROR|FATAL|PANIC|HINT|CONTEXT|DETAIL|STATEMENT/)) { if (exists $cur_info{$t_pid} && (!$prefix_vars{'t_session_line'} || ($prefix_vars{'t_session_line'} != $cur_info{$t_pid}{session}))) { &store_queries($t_pid); delete $cur_info{$t_pid}; } return; } # Store a counter of logs type $logs_type{$prefix_vars{'t_loglevel'}}++; # Replace syslog tabulation rewrite $prefix_vars{'t_query'} =~ s/#011/\t/g if ($format =~ /syslog/); my $date_part = "$prefix_vars{'t_year'}$prefix_vars{'t_month'}$prefix_vars{'t_day'}"; # Stores lock activity if (($prefix_vars{'t_loglevel'} eq 'LOG') && ($prefix_vars{'t_query'} =~ /acquired ([^\s]+) on ([^\s]+) .* after ([0-9\.]+) ms/)) { return if ($disable_lock); $lock_info{$1}{count}++; $lock_info{$1}{duration} += $3; $lock_info{$1}{$2}{count}++; $lock_info{$1}{$2}{duration} += $3; $lock_info{$1}{chronos}{$date_part}{$prefix_vars{'t_hour'}}{count}++; $lock_info{$1}{chronos}{$date_part}{$prefix_vars{'t_hour'}}{duration}++; # Store current lock information that will be used later # when we will parse the query responsible of the locks $cur_lock_info{$t_pid}{wait} = $3; return; } # Stores query related to last lock information if (($prefix_vars{'t_loglevel'} eq 'STATEMENT') && exists $cur_lock_info{$t_pid}) { $cur_lock_info{$t_pid}{query} = $prefix_vars{'t_query'}; $cur_lock_info{$t_pid}{timestamp} = $prefix_vars{'t_timestamp'}; $cur_lock_info{$t_pid}{dbname} = $prefix_vars{'t_dbname'}; $cur_lock_info{$t_pid}{dbuser} = $prefix_vars{'t_dbuser'}; $cur_lock_info{$t_pid}{dbclient} = $prefix_vars{'t_client'}; $cur_lock_info{$t_pid}{dbappname} = $prefix_vars{'t_appname'}; $cur_lock_info{$t_pid}{timestamp} = $prefix_vars{'t_timestamp'}; return; } # Stores temporary files activity if (($prefix_vars{'t_loglevel'} eq 'LOG') && ($prefix_vars{'t_query'} =~ /temporary file: path .*, size (\d+)/)) { return if ($disable_temporary); $tempfile_info{count}++; $tempfile_info{size} += $1; $tempfile_info{chronos}{$date_part}{$prefix_vars{'t_hour'}}{count}++; $tempfile_info{chronos}{$date_part}{$prefix_vars{'t_hour'}}{size} += $1; $tempfile_info{maxsize} = $1 if ($tempfile_info{maxsize} < $1); # Store current temporary file information that will be used later # when we will parse the query responsible of the tempfile $cur_temp_info{$t_pid}{size} = $1; return; } # Stores query related to last created temporary file if (($prefix_vars{'t_loglevel'} eq 'STATEMENT') && exists $cur_temp_info{$t_pid}) { $cur_temp_info{$t_pid}{query} = $prefix_vars{'t_query'}; $cur_temp_info{$t_pid}{timestamp} = $prefix_vars{'t_timestamp'}; $cur_temp_info{$t_pid}{dbname} = $prefix_vars{'t_dbname'}; $cur_temp_info{$t_pid}{dbuser} = $prefix_vars{'t_dbuser'}; $cur_temp_info{$t_pid}{dbclient} = $prefix_vars{'t_client'}; $cur_temp_info{$t_pid}{dbappname} = $prefix_vars{'t_appname'}; $cur_temp_info{$t_pid}{timestamp} = $prefix_vars{'t_timestamp'}; return; } # Stores pre-connection activity if (($prefix_vars{'t_loglevel'} eq 'LOG') && ($prefix_vars{'t_query'} =~ /connection received: host=([^\s]+) port=(\d+)/)) { return if ($disable_connection); $conn_received{$t_pid} = $1; return; } # Stores connection activity if ( ($prefix_vars{'t_loglevel'} eq 'LOG') && ($prefix_vars{'t_query'} =~ /connection authorized: user=([^\s]+) database=([^\s]+)/)) { return if ($disable_connection); my $usr = $1; my $db = $2; if ($extension eq 'tsung') { $tsung_session{$prefix_vars{'t_pid'}}{connection}{database} = $db; $tsung_session{$prefix_vars{'t_pid'}}{connection}{user} = $usr; $tsung_session{$prefix_vars{'t_pid'}}{connection}{date} = $prefix_vars{'t_date'}; return; } $connection_info{count}++; $connection_info{user}{$usr}++; $connection_info{database}{$db}++; $connection_info{database_user}{$db}{$usr}++; $connection_info{chronos}{$date_part}{$prefix_vars{'t_hour'}}{count}++; $connection_info{chronos}{$date_part}{$prefix_vars{'t_hour'}}{user}{$usr}++; $connection_info{chronos}{$date_part}{$prefix_vars{'t_hour'}}{database}{$db}++; $connection_info{chronos}{$date_part}{$prefix_vars{'t_hour'}}{database_user}{$db}{$usr}++; if ($graph) { $per_minute_info{connection}{$date_part}{$prefix_vars{'t_hour'}}{"$prefix_vars{'t_min'}"}{count}++; $per_minute_info{connection}{$date_part}{$prefix_vars{'t_hour'}}{"$prefix_vars{'t_min'}"}{second} {$prefix_vars{'t_sec'}}++; } if (exists $conn_received{$t_pid}) { $connection_info{host}{$conn_received{$t_pid}}++; $connection_info{chronos}{$date_part}{$prefix_vars{'t_hour'}}{host}{$conn_received{$t_pid}}++; delete $conn_received{$t_pid}; } return; } # Store session duration if (($prefix_vars{'t_loglevel'} eq 'LOG') && ($prefix_vars{'t_query'} =~ /disconnection: session time: ([^\s]+) user=([^\s]+) database=([^\s]+) host=([^\s]+)/)) { return if ($disable_session); if ($extension eq 'tsung') { $tsung_session{$prefix_vars{'t_pid'}}{disconnection}{date} = $prefix_vars{'t_timestamp'}; } my $time = $1; my $usr = $2; my $db = $3; my $host = $4; if ($extension eq 'tsung') { &store_tsung_session($prefix_vars{'t_pid'}); return; } # Store time in millisecond $time =~ /(\d+):(\d+):(\d+\.\d+)/; $time = ($3 * 1000) + ($2 * 60 * 1000) + ($1 * 60 * 60 * 1000); $session_info{count}++; $session_info{duration} += $time; $session_info{chronos}{$date_part}{$prefix_vars{'t_hour'}}{count}++; $session_info{chronos}{$date_part}{$prefix_vars{'t_hour'}}{duration} += $time; $session_info{database}{$db}{count}++; $session_info{database}{$db}{duration} += $time; $session_info{user}{$usr}{count}++; $session_info{user}{$usr}{duration} += $time; $session_info{host}{$host}{count}++; $session_info{host}{$host}{duration} += $time; return; } # Store autovacuum information if ( ($prefix_vars{'t_loglevel'} eq 'LOG') && ($prefix_vars{'t_query'} =~ /automatic vacuum of table "([^\s]+)": index scans: (\d+)/ ) ) { return if ($disable_autovacuum); $autovacuum_info{count}++; $autovacuum_info{tables}{$1}{vacuums} += 1; $autovacuum_info{tables}{$1}{idxscans} += $2; $autovacuum_info{chronos}{$date_part}{$prefix_vars{'t_hour'}}{count}++; $cur_info{$t_pid}{vacuum} = $1; return; } if ( ($prefix_vars{'t_loglevel'} eq 'LOG') && ($prefix_vars{'t_query'} =~ /automatic analyze of table "([^\s]+)"/ ) ) { return if ($disable_autovacuum); $autoanalyze_info{count}++; $autoanalyze_info{tables}{$1}{analyzes} += 1; $autoanalyze_info{chronos}{$date_part}{$prefix_vars{'t_hour'}}{count}++; } # Store checkpoint information if ( ($prefix_vars{'t_loglevel'} eq 'LOG') && ($prefix_vars{'t_query'} =~ /checkpoint complete: wrote (\d+) buffers \(([^\)]+)\); (\d+) transaction log file\(s\) added, (\d+) removed, (\d+) recycled; write=([0-9\.]+) s, sync=([0-9\.]+) s, total=([0-9\.]+) s/ ) ) { return if ($disable_checkpoint); $checkpoint_info{wbuffer} += $1; #$checkpoint_info{percent_wbuffer} += $2; $checkpoint_info{file_added} += $3; $checkpoint_info{file_removed} += $4; $checkpoint_info{file_recycled} += $5; $checkpoint_info{write} += $6; $checkpoint_info{sync} += $7; $checkpoint_info{total} += $8; $checkpoint_info{chronos}{$date_part}{$prefix_vars{'t_hour'}}{wbuffer} += $1; #$checkpoint_info{chronos}{$date_part}{$prefix_vars{'t_hour'}}{percent_wbuffer} += $2; $checkpoint_info{chronos}{$date_part}{$prefix_vars{'t_hour'}}{file_added} += $3; $checkpoint_info{chronos}{$date_part}{$prefix_vars{'t_hour'}}{file_removed} += $4; $checkpoint_info{chronos}{$date_part}{$prefix_vars{'t_hour'}}{file_recycled} += $5; $checkpoint_info{chronos}{$date_part}{$prefix_vars{'t_hour'}}{write} += $6; $checkpoint_info{chronos}{$date_part}{$prefix_vars{'t_hour'}}{sync} += $7; $checkpoint_info{chronos}{$date_part}{$prefix_vars{'t_hour'}}{total} += $8; return; } if ( ($prefix_vars{'t_loglevel'} eq 'LOG') && ($prefix_vars{'t_query'} =~ /checkpoints are occurring too frequently \((\d+) seconds apart\)/)) { return if ($disable_checkpoint); $checkpoint_info{warning}++; $checkpoint_info{warning_seconds} += $1; $checkpoint_info{chronos}{$date_part}{$prefix_vars{'t_hour'}}{warning}++; $checkpoint_info{chronos}{$date_part}{$prefix_vars{'t_hour'}}{warning_seconds} += $1; return; } # Store restartpoint information if ( ($prefix_vars{'t_loglevel'} eq 'LOG') && ($prefix_vars{'t_query'} =~ /restartpoint complete: wrote (\d+) buffers \(([^\)]+)\); write=([0-9\.]+) s, sync=([0-9\.]+) s, total=([0-9\.]+) s/ ) ) { return if ($disable_checkpoint); $restartpoint_info{wbuffer} += $1; #$restartpoint_info{percent_wbuffer} += $2; $restartpoint_info{write} += $3; $restartpoint_info{sync} += $4; $restartpoint_info{total} += $5; $restartpoint_info{chronos}{$date_part}{$prefix_vars{'t_hour'}}{wbuffer} += $1; #$restartpoint_info{chronos}{$date_part}{$prefix_vars{'t_hour'}}{percent_wbuffer} += $2; $restartpoint_info{chronos}{$date_part}{$prefix_vars{'t_hour'}}{write} += $3; $restartpoint_info{chronos}{$date_part}{$prefix_vars{'t_hour'}}{sync} += $4; $restartpoint_info{chronos}{$date_part}{$prefix_vars{'t_hour'}}{total} += $5; return; } # Store the detail of the error if ($cur_info{$t_pid}{loglevel} =~ /WARNING|ERROR|FATAL|PANIC/) { if ($prefix_vars{'t_loglevel'} =~ /(DETAIL|STATEMENT|CONTEXT|HINT)/) { $cur_info{$t_pid}{"\L$1\E"} .= $prefix_vars{'t_query'}; return; } } # Process current query following context if ($cur_info{$t_pid}{query}) { # Remove obsolete connection storage delete $conn_received{$cur_info{$t_pid}{pid}}; # The query is complete but we are missing some debug/info/bind parameter logs if ($cur_info{$t_pid}{loglevel} eq 'LOG') { # Apply bind parameters if any if (($prefix_vars{'t_loglevel'} eq 'DETAIL') && ($prefix_vars{'t_query'} =~ /parameters: (.*)/)) { $cur_info{$t_pid}{parameters} = "$1"; # go look at other params return; } } # When we are ready to overwrite the last storage, add it to the global stats if ( ($prefix_vars{'t_loglevel'} =~ /LOG|FATAL|PANIC|ERROR|WARNING|HINT/) && exists $cur_info{$t_pid} && (($format eq 'csv') || (!$prefix_vars{'t_session_line'} || ($prefix_vars{'t_session_line'} != $cur_info{$t_pid}{session}))) ) { &store_queries($t_pid); delete $cur_info{$t_pid}; } } # Registrer previous query storage into global statistics before starting to store current query if (exists $cur_info{$t_pid} && (!$prefix_vars{'t_session_line'} || ($prefix_vars{'t_session_line'} != $cur_info{$t_pid}{session}))) { &store_queries($t_pid); delete $cur_info{$t_pid}; } # Log lines with duration only, generated by log_duration = on in postgresql.conf if ($prefix_vars{'t_query'} =~ s/duration: ([0-9\.]+) ms$//s) { $prefix_vars{'t_duration'} = $1; $prefix_vars{'t_query'} = ''; &set_current_infos($t_pid); return; } # Store info as tsung session following the output file extension if (($extension eq 'tsung') && !exists $tsung_session{$prefix_vars{'t_pid'}}{connection} && $prefix_vars{'t_dbname'}) { $tsung_session{$prefix_vars{'t_pid'}}{connection}{database} = $prefix_vars{'t_dbname'}; $tsung_session{$prefix_vars{'t_pid'}}{connection}{user} = $prefix_vars{'t_dbuser'}; $tsung_session{$prefix_vars{'t_pid'}}{connection}{date} = $prefix_vars{'t_date'}; } my $t_action = ''; # Store query duration generated by log_min_duration >= 0 in postgresql.conf if ($prefix_vars{'t_query'} =~ s/duration: ([0-9\.]+) ms (query|statement): //is) { $prefix_vars{'t_duration'} = $1; $t_action = $2; # Log line with duration and statement from prepared queries } elsif ($prefix_vars{'t_query'} =~ s/duration: ([0-9\.]+) ms (prepare|parse|bind|execute|execute from fetch)\s+[^:]+:\s//is) { $prefix_vars{'t_duration'} = $1; $t_action = $2; # Skipping parse and bind logs return if ($t_action !~ /query|statement|execute/); # Log line without duration at all } elsif ($prefix_vars{'t_query'} =~ s/(query|statement): //is) { $t_action = $1; # Log line without duration at all from prepared queries } elsif ($prefix_vars{'t_query'} =~ s/(prepare|parse|bind|execute|execute from fetch)\s+[^:]+:\s//is) { $t_action = $1; # Skipping parse and bind logs return if ($t_action !~ /query|statement|execute/); # Log line that should not be parse } elsif ($prefix_vars{'t_loglevel'} eq 'LOG') { if ($prefix_vars{'t_query'} !~ /incomplete startup packet|connection|receive|unexpected EOF|still waiting for [^\s]+Lock|checkpoint starting:|could not send data to client|parameter .*configuration file|autovacuum launcher|automatic (analyze|vacuum)|detected deadlock while waiting for|database system was shut down/ ) { &logmsg('DEBUG', "Unrecognized line: $prefix_vars{'t_loglevel'}: $prefix_vars{'t_query'} at line $nlines"); } if (exists $cur_info{$t_pid} && (!$prefix_vars{'t_session_line'} || ($prefix_vars{'t_session_line'} != $cur_info{$t_pid}{session}))) { &store_queries($t_pid); delete $cur_info{$t_pid}; } return; } if ( ($format eq 'csv') && ($prefix_vars{'t_loglevel'} ne 'LOG')) { $cur_info{$t_pid}{detail} = $prefix_vars{'t_detail'}; $cur_info{$t_pid}{hint} = $prefix_vars{'t_hint'}; $cur_info{$t_pid}{context} = $prefix_vars{'t_context'}; $cur_info{$t_pid}{statement} = $prefix_vars{'t_statement'} } &set_current_infos($t_pid); return 1; } sub set_current_infos { my $t_pid = shift; $cur_info{$t_pid}{year} = $prefix_vars{'t_year'}; $cur_info{$t_pid}{month} = $prefix_vars{'t_month'}; $cur_info{$t_pid}{day} = $prefix_vars{'t_day'}; $cur_info{$t_pid}{hour} = $prefix_vars{'t_hour'}; $cur_info{$t_pid}{min} = $prefix_vars{'t_min'}; $cur_info{$t_pid}{sec} = $prefix_vars{'t_sec'}; $cur_info{$t_pid}{timestamp} = $prefix_vars{'t_timestamp'}; $cur_info{$t_pid}{ident} = $prefix_vars{'t_ident'}; $cur_info{$t_pid}{query} = $prefix_vars{'t_query'}; $cur_info{$t_pid}{duration} = $prefix_vars{'t_duration'}; $cur_info{$t_pid}{pid} = $prefix_vars{'t_pid'}; $cur_info{$t_pid}{session} = $prefix_vars{'t_session_line'}; $cur_info{$t_pid}{loglevel} = $prefix_vars{'t_loglevel'}; $cur_info{$t_pid}{dbname} = $prefix_vars{'t_dbname'}; $cur_info{$t_pid}{dbuser} = $prefix_vars{'t_dbuser'}; $cur_info{$t_pid}{dbclient} = $prefix_vars{'t_client'}; $cur_info{$t_pid}{dbappname} = $prefix_vars{'t_appname'}; $cur_info{$t_pid}{date} = $prefix_vars{'t_date'}; } sub store_tsung_session { my $pid = shift; return if ($#{$tsung_session{$pid}{dates}} < 0); # Open filehandle my $fh = new IO::File ">>$outfile"; if (not defined $fh) { die "FATAL: can't write to $outfile, $!\n"; } if ($pid) { print $fh " \n"; if (exists $tsung_session{$pid}{connection}{database}) { print $fh qq{ }; } if ($#{$tsung_session{$pid}{dates}} >= 0) { my $sec = 0; if ($tsung_session{$pid}{connection}{date}) { $sec = $tsung_session{$pid}{dates}[0] - $tsung_session{$pid}{connection}{date}; } print $fh " \n" if ($sec > 0); print $fh " \n"; for (my $i = 0 ; $i <= $#{$tsung_session{$pid}{queries}} ; $i++) { $tsung_queries++; $sec = 0; if ($i > 0) { $sec = $tsung_session{$pid}{dates}[$i] - $tsung_session{$pid}{dates}[$i - 1]; print $fh " \n" if ($sec > 0); } print $fh " \n"; } print $fh " \n"; } if ($#{$tsung_session{$pid}{dates}} >= 0) { my $sec = $tsung_session{$pid}{disconnection}{date} - $tsung_session{$pid}{dates}[-1]; print $fh " \n" if ($sec > 0); } if (exists $tsung_session{$pid}{connection}{database}) { print $fh " \n"; } print $fh " \n\n"; delete $tsung_session{$pid}; } $fh->close; } sub store_queries { my $t_pid = shift; # Remove comments if required if ($remove_comment) { $cur_info{$t_pid}{query} =~ s/\/\*(.*?)\*\///gs; } # Cleanup and normalize the current query $cur_info{$t_pid}{query} =~ s/^[\t\s\r\n]+//s; $cur_info{$t_pid}{query} =~ s/[\t\s\r\n;]+$//s; # Replace bind parameters values in the query if any if (exists $cur_info{$t_pid}{parameters}) { my @t_res = split(/[,\s]*\$(\d+)\s=\s/, $cur_info{$t_pid}{parameters}); shift(@t_res); for (my $i = 0 ; $i < $#t_res ; $i += 2) { $cur_info{$t_pid}{query} =~ s/\$$t_res[$i]\b/$t_res[$i+1]/s; } } # We only process stored object with query here if ($cur_info{$t_pid}{query}) { # Should we just want select queries if ($select_only) { return if (($cur_info{$t_pid}{query} !~ /^SELECT/is) || ($cur_info{$t_pid}{query} =~ /FOR UPDATE/is)); } # Should we have to exclude some queries if ($#exclude_query >= 0) { foreach (@exclude_query) { if ($cur_info{$t_pid}{query} =~ /$_/i) { $cur_info{$t_pid}{query} = ''; return; } } } # Should we have to include only some queries if ($#include_query >= 0) { foreach (@include_query) { if ($cur_info{$t_pid}{query} !~ /$_/i) { $cur_info{$t_pid}{query} = ''; return; } } } # Truncate the query if requested by the user $cur_info{$t_pid}{query} = substr($cur_info{$t_pid}{query}, 0, $maxlength) . '[...]' if (($maxlength > 0) && (length($cur_info{$t_pid}{query}) > $maxlength)); # Dump queries as tsung request and return if ($extension eq 'tsung') { if ($cur_info{$t_pid}{loglevel} eq 'LOG') { push(@{$tsung_session{$t_pid}{queries}}, $cur_info{$t_pid}{query}); push(@{$tsung_session{$t_pid}{dates}}, $cur_info{$t_pid}{date}); if (!exists $tsung_session{$t_pid}{connection} && $cur_info{$t_pid}{dbname}) { $tsung_session{$t_pid}{connection}{database} = $cur_info{$t_pid}{dbname}; $tsung_session{$t_pid}{connection}{user} = $cur_info{$t_pid}{dbuser}; $tsung_session{$t_pid}{connection}{date} = $cur_info{$t_pid}{date}; } } return; } } my $cur_day_str = "$cur_info{$t_pid}{year}$cur_info{$t_pid}{month}$cur_info{$t_pid}{day}"; my $cur_hour_str = "$cur_info{$t_pid}{hour}"; # Store the collected information into global statistics if ($cur_info{$t_pid}{loglevel} =~ /WARNING|ERROR|FATAL|PANIC|HINT/) { # Add log level at beginning of the query and normalize it $cur_info{$t_pid}{query} = $cur_info{$t_pid}{loglevel} . ": " . $cur_info{$t_pid}{query}; my $normalized_error = &normalize_error($cur_info{$t_pid}{query}); # Stores total and normalized error count $overall_stat{'errors_number'}++; $overall_stat{'unique_normalized_errors'}{"$normalized_error"}++; $error_info{$normalized_error}{count}++; # Stores normalized error count per time $error_info{$normalized_error}{chronos}{"$cur_day_str"}{"$cur_hour_str"}{count}++; # Stores normalized query samples my $cur_last_log_timestamp = "$cur_info{$t_pid}{year}-$cur_info{$t_pid}{month}-$cur_info{$t_pid}{day} " . "$cur_info{$t_pid}{hour}:$cur_info{$t_pid}{min}:$cur_info{$t_pid}{sec}"; &set_top_error_sample( $normalized_error, $cur_last_log_timestamp, $cur_info{$t_pid}{query}, $cur_info{$t_pid}{detail}, $cur_info{$t_pid}{context}, $cur_info{$t_pid}{statement}, $cur_info{$t_pid}{hint}, $cur_info{$t_pid}{dbname} ); } elsif ($cur_info{$t_pid}{loglevel} eq 'LOG') { # Stores global statistics $overall_stat{'queries_number'}++; $overall_stat{'queries_duration'} += $cur_info{$t_pid}{duration} if ($cur_info{$t_pid}{duration}); my $cur_last_log_timestamp = "$cur_info{$t_pid}{year}-$cur_info{$t_pid}{month}-$cur_info{$t_pid}{day} " . "$cur_info{$t_pid}{hour}:$cur_info{$t_pid}{min}:$cur_info{$t_pid}{sec}"; if (!$overall_stat{'first_query_ts'} || ($overall_stat{'first_query_ts'} gt $cur_last_log_timestamp)) { $overall_stat{'first_query_ts'} = $cur_last_log_timestamp; } if (!$overall_stat{'last_query_ts'} || ($overall_stat{'last_query_ts'} lt $cur_last_log_timestamp)) { $overall_stat{'last_query_ts'} = $cur_last_log_timestamp; } $overall_stat{'query_peak'}{$cur_last_log_timestamp}++; $per_hour_info{"$cur_day_str"}{"$cur_hour_str"}{count}++; if ($cur_info{$t_pid}{duration}) { $per_hour_info{"$cur_day_str"}{"$cur_hour_str"}{duration} += $cur_info{$t_pid}{duration}; # Store min / max duration if (!exists $per_hour_info{"$cur_day_str"}{"$cur_hour_str"}{min} || ($per_hour_info{"$cur_day_str"}{"$cur_hour_str"}{min} > $cur_info{$t_pid}{duration})) { $per_hour_info{"$cur_day_str"}{"$cur_hour_str"}{min} = $cur_info{$t_pid}{duration}; } if (!exists $per_hour_info{"$cur_day_str"}{"$cur_hour_str"}{max} || ($per_hour_info{"$cur_day_str"}{"$cur_hour_str"}{max} < $cur_info{$t_pid}{duration})) { $per_hour_info{"$cur_day_str"}{"$cur_hour_str"}{max} = $cur_info{$t_pid}{duration}; } } if ($graph) { $per_minute_info{query}{"$cur_day_str"}{"$cur_hour_str"}{$cur_info{$t_pid}{min}}{count}++; $per_minute_info{query}{"$cur_day_str"}{"$cur_hour_str"}{$cur_info{$t_pid}{min}}{second}{$cur_info{$t_pid}{sec}}++; $per_minute_info{query}{"$cur_day_str"}{"$cur_hour_str"}{$cur_info{$t_pid}{min}}{duration} += $cur_info{$t_pid}{duration} if ($cur_info{$t_pid}{duration}); } # Counter per database and application name if ($cur_info{$t_pid}{dbname}) { $database_info{$cur_info{$t_pid}{dbname}}{count}++; } if ($cur_info{$t_pid}{dbappname}) { $application_info{$cur_info{$t_pid}{dbappname}}{count}++; } else { $application_info{others}{count}++; } # Store normalized query temp file size if required if (exists $cur_temp_info{$t_pid} && ($cur_temp_info{$t_pid} ne '') ) { # Add a semi-colon at end of the query $cur_temp_info{$t_pid}{query} .= ';' if (substr($cur_temp_info{$t_pid}{query}, -1, 1) ne ';'); # Normalize query my $normalized = &normalize_query($cur_temp_info{$t_pid}{query}); $normalyzed_info{$normalized}{tempfiles}{size} += $cur_temp_info{$t_pid}{size}; $normalyzed_info{$normalized}{tempfiles}{count}++; if ($normalyzed_info{$normalized}{tempfiles}{maxsize} < $cur_temp_info{$t_pid}{size}) { $normalyzed_info{$normalized}{tempfiles}{maxsize} = $cur_temp_info{$t_pid}{size}; } if (!exists($normalyzed_info{$normalized}{tempfiles}{minsize}) || $normalyzed_info{$normalized}{tempfiles}{minsize} > $cur_temp_info{$t_pid}{size}) { $normalyzed_info{$normalized}{tempfiles}{minsize} = $cur_temp_info{$t_pid}{size}; } &set_top_tempfile_info($cur_temp_info{$t_pid}{query}, $cur_temp_info{$t_pid}{size}, $cur_temp_info{$t_pid}{timestamp}, $cur_temp_info{$t_pid}{dbname}, $cur_temp_info{$t_pid}{dbuser}, $cur_temp_info{$t_pid}{dbclient}, $cur_temp_info{$t_pid}{dbappname}); delete $cur_temp_info{$t_pid}; } # Store normalized query that waited the most if required if (exists $cur_lock_info{$t_pid}) { # Add a semi-colon at end of the query $cur_lock_info{$t_pid}{query} .= ';' if (substr($cur_lock_info{$t_pid}{query}, -1, 1) ne ';'); # Normalize query my $normalized = &normalize_query($cur_lock_info{$t_pid}{query}); $normalyzed_info{$normalized}{locks}{wait} += $cur_lock_info{$t_pid}{wait}; $normalyzed_info{$normalized}{locks}{count}++; if ($normalyzed_info{$normalized}{locks}{maxwait} < $cur_lock_info{$t_pid}{wait}) { $normalyzed_info{$normalized}{locks}{maxwait} = $cur_lock_info{$t_pid}{wait}; } if (!exists($normalyzed_info{$normalized}{locks}{minwait}) || $normalyzed_info{$normalized}{locks}{minwait} > $cur_lock_info{$t_pid}{wait}) { $normalyzed_info{$normalized}{locks}{minwait} = $cur_lock_info{$t_pid}{wait}; } &set_top_locked_info($cur_lock_info{$t_pid}{query}, $cur_lock_info{$t_pid}{wait}, $cur_lock_info{$t_pid}{timestamp}, $cur_lock_info{$t_pid}{dbname}, $cur_lock_info{$t_pid}{dbuser}, $cur_lock_info{$t_pid}{dbclient}, $cur_lock_info{$t_pid}{dbappname}); delete $cur_lock_info{$t_pid}; } if ($cur_info{$t_pid}{query}) { # Add a semi-colon at end of the query $cur_info{$t_pid}{query} .= ';' if (substr($cur_info{$t_pid}{query}, -1, 1) ne ';'); # Normalize query my $normalized = &normalize_query($cur_info{$t_pid}{query}); foreach my $act (@action_regex) { if ($normalized =~ $act) { my $action = uc($1); $overall_stat{$action}++; $per_hour_info{"$cur_day_str"}{"$cur_hour_str"}{$action}{count}++; $per_hour_info{"$cur_day_str"}{"$cur_hour_str"}{$action}{duration} += $cur_info{$t_pid}{duration} if ($cur_info{$t_pid}{duration}); if ($cur_info{$t_pid}{dbname}) { $database_info{$cur_info{$t_pid}{dbname}}{$action}++; } if ($cur_info{$t_pid}{dbappname}) { $application_info{$cur_info{$t_pid}{dbappname}}{$action}++; } else { $application_info{others}{$action}++; } last; } } # Store normalized query count $normalyzed_info{$normalized}{count}++; # Store normalized query count and duration per time $normalyzed_info{$normalized}{chronos}{"$cur_day_str"}{"$cur_hour_str"}{count}++; if ($cur_info{$t_pid}{duration}) { # Updtate top slowest queries statistics &set_top_slowest($cur_info{$t_pid}{query}, $cur_info{$t_pid}{duration}, $cur_last_log_timestamp, $cur_info{$t_pid}{dbname}, $cur_info{$t_pid}{dbuser}, $cur_info{$t_pid}{dbclient},$cur_info{$t_pid}{dbappname}); # Store normalized query total duration $normalyzed_info{$normalized}{duration} += $cur_info{$t_pid}{duration}; # Store min / max duration if (!exists $normalyzed_info{$normalized}{min} || ($normalyzed_info{$normalized}{min} > $cur_info{$t_pid}{duration})) { $normalyzed_info{$normalized}{min} = $cur_info{$t_pid}{duration}; } if (!exists $normalyzed_info{$normalized}{max} || ($normalyzed_info{$normalized}{max} < $cur_info{$t_pid}{duration})) { $normalyzed_info{$normalized}{max} = $cur_info{$t_pid}{duration}; } # Store normalized query count and duration per time $normalyzed_info{$normalized}{chronos}{"$cur_day_str"}{"$cur_hour_str"}{duration} += $cur_info{$t_pid}{duration}; # Store normalized query samples &set_top_sample($normalized, $cur_info{$t_pid}{query}, $cur_info{$t_pid}{duration}, $overall_stat{'last_log_ts'},$cur_info{$t_pid}{dbname}, $cur_info{$t_pid}{dbuser}, $cur_info{$t_pid}{dbclient},$cur_info{$t_pid}{dbappname}); } } } } # Normalize error messages sub normalize_error { my $orig_query = shift; return if (!$orig_query); # Remove character position $orig_query =~ s/ at character \d+//; # Remove encoding detail $orig_query =~ s/(byte sequence for encoding).*/$1/; # Replace changing parameter by ... $orig_query =~ s/"[^"]*"/"..."/g; $orig_query =~ s/\(.*\)/\(...\)/g; $orig_query =~ s/column .* does not exist/column "..." does not exist/; $orig_query =~ s/(database system was shut down at).*/$1 .../; # Need more normalization stuff here return $orig_query; } sub average_per_minutes { my $val = shift; my $idx = shift; my @avgs = (); for (my $i = 0 ; $i < 59 ; $i += $idx) { push(@avgs, sprintf("%02d", $i)); } push(@avgs, 59); for (my $i = 0 ; $i <= $#avgs ; $i++) { if ($val == $avgs[$i]) { return "$avgs[$i]"; } elsif ($avgs[$i] == $avgs[-1]) { return "$avgs[$i-1]"; } elsif (($val > $avgs[$i]) && ($val < $avgs[$i + 1])) { return "$avgs[$i]"; } } return $val; } sub autodetect_format { my $file = shift; # Open log file for reading my $nfound = 0; my $nline = 0; my $fmt = ''; die "FATAL: can't open file $file, $!\n" unless(open(TESTFILE, $file)); binmode(TESTFILE); my $fltf = ; close($fltf); # is file in binary format ? if ( $fltf =~ /^pst\d/ ) { $fmt = 'binary'; } else { # try to detect syslogs or csv my ($tfile, $totalsize) = &get_log_file($file); my %ident_name = (); while (my $line = <$tfile>) { chomp($line); $line =~ s/\r//; next if (!$line); $nline++; # Are syslog lines ? if ($line =~ /^[A-Z][a-z]{2}\s+\d+\s\d+:\d+:\d+(?:\s[^\s]+)?\s[^\s]+\s([^\s\[]+)\[\d+\]:(?:\s\[[^\]]+\])?\s\[\d+\-\d+\].*?(LOG|WARNING|ERROR|FATAL|PANIC|DETAIL|STATEMENT|HINT|CONTEXT):/ ) { $fmt = 'syslog'; $nfound++; $ident_name{$1}++; } elsif ($line =~ /^\d+-\d+-\d+T\d+:\d+:\d+(?:.[^\s]+)?\s[^\s]+\s([^\s\[]+)\[\d+\]:(?:\s\[[^\]]+\])?\s\[\d+\-\d+\].*?(LOG|WARNING|ERROR|FATAL|PANIC|DETAIL|STATEMENT|HINT|CONTEXT):/ ) { $fmt = 'syslog2'; $nfound++; $ident_name{$1}++; # Are stderr lines ? } elsif ( ( $line =~ /^\d+-\d+-\d+ \d+:\d+:\d+\.\d+(?: [A-Z\d]{3,6})?,.*,(LOG|WARNING|ERROR|FATAL|PANIC|DETAIL|STATEMENT|HINT|CONTEXT),/ ) && ($line =~ tr/,/,/ >= 12) ) { $fmt = 'csv'; $nfound++; } elsif ($line =~ /\d+-\d+-\d+ \d+:\d+:\d+[\.0-9]*(?: [A-Z\d]{3,6})?(.*?)(LOG|WARNING|ERROR|FATAL|PANIC|DETAIL|STATEMENT|HINT|CONTEXT):\s+/ ) { $fmt = 'stderr'; $nfound++; } last if (($nfound > 10) || ($nline > 5000)); } $tfile->close(); if (!$fmt || ($nfound < 10)) { die "FATAL: unable to detect log file format from $file, please use -f option.\n"; } if (($fmt =~ /syslog/) && !$ident && (scalar keys %ident_name == 1)) { $ident = (keys %ident_name)[0]; } } &logmsg('DEBUG', "Autodetected log format '$fmt' from $file"); return $fmt; } sub progress_bar { my ($got, $total, $width, $char, $queries, $errors) = @_; $width ||= 25; $char ||= '='; my $num_width = length $total; if ($extension eq 'tsung') { sprintf( "[%-${width}s] Parsed %${num_width}s bytes of %s (%.2f%%), queries: %d\r", $char x (($width - 1) * $got / $total) . '>', $got, $total, 100 * $got / +$total, ($queries || $tsung_queries) ); } elsif($format eq 'binary') { my $file = $_[-1]; sprintf( "Loaded %d queries and %d events from binary file %s...\r", $overall_stat{'queries_number'}, $overall_stat{'errors_number'}, $queries ); } else { sprintf( "[%-${width}s] Parsed %${num_width}s bytes of %s (%.2f%%), queries: %d, events: %d\r", $char x (($width - 1) * $got / $total) . '>', $got, $total, 100 * $got / +$total, ($queries || $overall_stat{'queries_number'}), ($errors || $overall_stat{'errors_number'}) ); } } sub flotr2_graph { my ($buttonid, $divid, $data1, $data2, $data3, $title, $ytitle, $legend1, $legend2, $legend3, $ytitle2, $data4, $legend4) = @_; $data1 = "var d1 = [$data1];" if ($data1); $data2 = "var d2 = [$data2];" if ($data2); $data3 = "var d3 = [$data3];" if ($data3); $data4 = "var d4 = [$data4];" if ($data4); $legend1 = "{ data: d1, label: \"$legend1\" }," if ($legend1); $legend2 = "{ data: d2, label: \"$legend2\" }," if ($legend2); $legend3 = "{ data: d3, label: \"$legend3\" }," if ($legend3); $legend4 = "{ data: d4, label: \"$legend4\",yaxis: 2 }," if ($legend4); my $yaxis2 = ''; if ($ytitle2) { $yaxis2 = "y2axis: { title: \"$ytitle2\", min: 0, color: \"#4DA74D\" },"; } my $min = $t_min; my $max = $t_max; if ($divid !~ /persecond/) { $min = $t_min_hour; $max = $t_max_hour; } print $fh <
EOF } sub flotr2_piegraph { my ($buttonid, $divid, $title, %data) = @_; my @datadef = (); my @contdef = (); my $i = 1; foreach my $k (sort keys %data) { push(@datadef, "var d$i = [ [0,$data{$k}] ];\n"); push(@contdef, "{ data: d$i, label: \"$k\" },\n"); $i++; } print $fh <
EOF } sub build_log_line_prefix_regex { my %regex_map = ( '%a' => [('t_appname', '([0-9a-zA-Z\.\-\_\/\[\]]*)')], # application name '%u' => [('t_dbuser', '([0-9a-zA-Z\_\[\]\-]*)')], # user name '%d' => [('t_dbname', '([0-9a-zA-Z\_\[\]\-]*)')], # database name '%r' => [('t_hostport', '([a-zA-Z0-9\-\.]+|\[local\]|\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})?[\(\d\)]*')], # remote host and port '%h' => [('t_client', '([a-zA-Z0-9\-\.]+|\[local\]|\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})?')], # remote host '%p' => [('t_pid', '(\d+)')], # process ID '%t' => [('t_timestamp', '(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})(?: [A-Z\d]{3,6})?')], # timestamp without milliseconds '%m' => [('t_mtimestamp', '(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})\.\d+(?: [A-Z\d]{3,6})?')], # timestamp with milliseconds '%l' => [('t_session_line', '(\d+)')], # session line number '%s' => [('t_session_timestamp', '(\d{4}-\d{2}-\d{2} \d{2}):\d{2}:\d{2}(?: [A-Z\d]{3,6})?')], # session start timestamp '%c' => [('t_session_id', '([0-9a-f\.]*)')], # session ID '%v' => [('t_virtual_xid', '([0-9a-f\.\/]*)')], # virtual transaction ID '%x' => [('t_xid', '([0-9a-f\.\/]*)')], # transaction ID '%i' => [('t_command', '([0-9a-zA-Z\.\-\_]*)')], # command tag '%e' => [('t_sqlstate', '([0-9a-zA-Z]+)')], # SQL state ); my @param_list = (); $log_line_prefix =~ s/([\[\]\|\(\)\{\}])/\\$1/g; $log_line_prefix =~ s/\%l([^\d])\d+/\%l$1\\d\+/; while ($log_line_prefix =~ s/(\%[audrhptmlscvxie])/$regex_map{"$1"}->[1]/) { push(@param_list, $regex_map{"$1"}->[0]); } # replace %% by a single % $log_line_prefix =~ s/\%\%/\%/; return @param_list; } # Inclusion of Perl package SQL::Beautify # Copyright (C) 2009 by Jonas Kramer # Published under the terms of the Artistic License 2.0. { package SQL::Beautify; use strict; use warnings; our $VERSION = 0.04; use Carp; # Keywords from SQL-92, SQL-99 and SQL-2003. use constant KEYWORDS => qw( ABSOLUTE ACTION ADD AFTER ALL ALLOCATE ALTER AND ANY ARE ARRAY AS ASC ASENSITIVE ASSERTION ASYMMETRIC AT ATOMIC AUTHORIZATION AVG BEFORE BEGIN BETWEEN BIGINT BINARY BIT BIT_LENGTH BLOB BOOLEAN BOTH BREADTH BY CALL CALLED CASCADE CASCADED CASE CAST CATALOG CHAR CHARACTER CHARACTER_LENGTH CHAR_LENGTH CHECK CLOB CLOSE COALESCE COLLATE COLLATION COLUMN COMMIT CONDITION CONNECT CONNECTION CONSTRAINT CONSTRAINTS CONSTRUCTOR CONTAINS CONTINUE CONVERT CORRESPONDING COUNT CREATE CROSS CUBE CURRENT CURRENT_DATE CURRENT_DEFAULT_TRANSFORM_GROUP CURRENT_PATH CURRENT_ROLE CURRENT_TIME CURRENT_TIMESTAMP CURRENT_TRANSFORM_GROUP_FOR_TYPE CURRENT_USER CURSOR CYCLE DATA DATE DAY DEALLOCATE DEC DECIMAL DECLARE DEFAULT DEFERRABLE DEFERRED DELETE DEPTH DEREF DESC DESCRIBE DESCRIPTOR DETERMINISTIC DIAGNOSTICS DISCONNECT DISTINCT DO DOMAIN DOUBLE DROP DYNAMIC EACH ELEMENT ELSE ELSEIF END EPOCH EQUALS ESCAPE EXCEPT EXCEPTION EXEC EXECUTE EXISTS EXIT EXTERNAL EXTRACT FALSE FETCH FILTER FIRST FLOAT FOR FOREIGN FOUND FREE FROM FULL FUNCTION GENERAL GET GLOBAL GO GOTO GRANT GROUP GROUPING HANDLER HAVING HOLD HOUR IDENTITY IF IMMEDIATE IN INDICATOR INITIALLY INNER INOUT INPUT INSENSITIVE INSERT INT INTEGER INTERSECT INTERVAL INTO IS ISOLATION ITERATE JOIN KEY LANGUAGE LARGE LAST LATERAL LEADING LEAVE LEFT LEVEL LIKE LIMIT LOCAL LOCALTIME LOCALTIMESTAMP LOCATOR LOOP LOWER MAP MATCH MAX MEMBER MERGE METHOD MIN MINUTE MODIFIES MODULE MONTH MULTISET NAMES NATIONAL NATURAL NCHAR NCLOB NEW NEXT NO NONE NOT NULL NULLIF NUMERIC OBJECT OCTET_LENGTH OF OLD ON ONLY OPEN OPTION OR ORDER ORDINALITY OUT OUTER OUTPUT OVER OVERLAPS PAD PARAMETER PARTIAL PARTITION PATH POSITION PRECISION PREPARE PRESERVE PRIMARY PRIOR PRIVILEGES PROCEDURE PUBLIC RANGE READ READS REAL RECURSIVE REF REFERENCES REFERENCING RELATIVE RELEASE REPEAT RESIGNAL RESTRICT RESULT RETURN RETURNS REVOKE RIGHT ROLE ROLLBACK ROLLUP ROUTINE ROW ROWS SAVEPOINT SCHEMA SCOPE SCROLL SEARCH SECOND SECTION SELECT SENSITIVE SESSION SESSION_USER SET SETS SIGNAL SIMILAR SIZE SMALLINT SOME SPACE SPECIFIC SPECIFICTYPE SQL SQLCODE SQLERROR SQLEXCEPTION SQLSTATE SQLWARNING START STATE STATIC SUBMULTISET SUBSTRING SUM SYMMETRIC SYSTEM SYSTEM_USER TABLE TABLESAMPLE TEMPORARY TEXT THEN TIME TIMESTAMP TIMEZONE_HOUR TIMEZONE_MINUTE TINYINT TO TRAILING TRANSACTION TRANSLATE TRANSLATION TREAT TRIGGER TRIM TRUE UNDER UNDO UNION UNIQUE UNKNOWN UNNEST UNTIL UPDATE UPPER USAGE USER USING VALUE VALUES VARCHAR VARYING VIEW WHEN WHENEVER WHERE WHILE WINDOW WITH WITHIN WITHOUT WORK WRITE YEAR ZONE ); sub tokenize_sql { my ($query, $remove_white_tokens) = @_; my $re = qr{ ( (?:--|\#)[\ \t\S]* # single line comments | (?:<>|<=>|>=|<=|==|=|!=|!|<<|>>|<|>|\|\||\||&&|&|-|\+|\*(?!/)|/(?!\*)|\%|~|\^|\?) # operators and tests | [\[\]\(\),;.] # punctuation (parenthesis, comma) | \'\'(?!\') # empty single quoted string | \"\"(?!\"") # empty double quoted string | "(?>(?:(?>[^"\\]+)|""|\\.)*)+" # anything inside double quotes, ungreedy | `(?>(?:(?>[^`\\]+)|``|\\.)*)+` # anything inside backticks quotes, ungreedy | '(?>(?:(?>[^'\\]+)|''|\\.)*)+' # anything inside single quotes, ungreedy. | /\*[\ \t\r\n\S]*?\*/ # C style comments | (?:[\w:@]+(?:\.(?:\w+|\*)?)*) # words, standard named placeholders, db.table.*, db.* | (?: \$_\$ | \$\d+ | \${1,2} ) # dollar expressions - eg $_$ $3 $$ | \n # newline | [\t\ ]+ # any kind of white spaces ) }smx; my @query = (); @query = $query =~ m{$re}smxg; if ($remove_white_tokens) { @query = grep(!/^[\s\n\r]*$/, @query); } return wantarray ? @query : \@query; } sub new { my ($class, %options) = @_; my $self = bless {%options}, $class; # Set some defaults. $self->{query} = '' unless defined($self->{query}); $self->{spaces} = 4 unless defined($self->{spaces}); $self->{space} = ' ' unless defined($self->{space}); $self->{break} = "\n" unless defined($self->{break}); $self->{wrap} = {} unless defined($self->{wrap}); $self->{keywords} = [] unless defined($self->{keywords}); $self->{rules} = {} unless defined($self->{rules}); $self->{uc_keywords} = 0 unless defined $self->{uc_keywords}; push @{$self->{keywords}}, KEYWORDS; # Initialize internal stuff. $self->{_level} = 0; return $self; } # Add more SQL. sub add { my ($self, $addendum) = @_; $addendum =~ s/^\s*/ /; $self->{query} .= $addendum; } # Set SQL to beautify. sub query { my ($self, $query) = @_; $self->{query} = $query if (defined($query)); return $self->{query}; } # Beautify SQL. sub beautify { my ($self) = @_; $self->{_output} = ''; $self->{_level_stack} = []; $self->{_new_line} = 1; my $last = ''; $self->{_tokens} = [tokenize_sql($self->query, 1)]; while (defined(my $token = $self->_token)) { my $rule = $self->_get_rule($token); # Allow custom rules to override defaults. if ($rule) { $self->_process_rule($rule, $token); } elsif ($token eq '(') { $self->_add_token($token); $self->_new_line; push @{$self->{_level_stack}}, $self->{_level}; $self->_over unless $last and uc($last) eq 'WHERE'; } elsif ($token eq ')') { # $self->_new_line; $self->{_level} = pop(@{$self->{_level_stack}}) || 0; $self->_add_token($token); $self->_new_line if ($self->_next_token and $self->_next_token !~ /^AS$/i and $self->_next_token ne ')' and $self->_next_token !~ /::/ and $self->_next_token ne ';' ); } elsif ($token eq ',') { $self->_add_token($token); $self->_new_line; } elsif ($token eq ';') { $self->_add_token($token); $self->_new_line; # End of statement; remove all indentation. @{$self->{_level_stack}} = (); $self->{_level} = 0; } elsif ($token =~ /^(?:SELECT|FROM|WHERE|HAVING|BEGIN|SET)$/i) { $self->_back if ($last and $last ne '(' and $last ne 'FOR'); $self->_new_line; $self->_add_token($token); $self->_new_line if ((($token ne 'SET') || $last) and $self->_next_token and $self->_next_token ne '(' and $self->_next_token ne ';'); $self->_over; } elsif ($token =~ /^(?:GROUP|ORDER|LIMIT)$/i) { $self->_back; $self->_new_line; $self->_add_token($token); } elsif ($token =~ /^(?:BY)$/i) { $self->_add_token($token); $self->_new_line; $self->_over; } elsif ($token =~ /^(?:CASE)$/i) { $self->_add_token($token); $self->_over; } elsif ($token =~ /^(?:WHEN)$/i) { $self->_new_line; $self->_add_token($token); } elsif ($token =~ /^(?:ELSE)$/i) { $self->_new_line; $self->_add_token($token); } elsif ($token =~ /^(?:END)$/i) { $self->_back; $self->_new_line; $self->_add_token($token); } elsif ($token =~ /^(?:UNION|INTERSECT|EXCEPT)$/i) { $self->_back unless $last and $last eq '('; $self->_new_line; $self->_add_token($token); $self->_new_line if ($self->_next_token and $self->_next_token ne '('); $self->_over; } elsif ($token =~ /^(?:LEFT|RIGHT|INNER|OUTER|CROSS)$/i) { $self->_back; $self->_new_line; $self->_add_token($token); $self->_over; } elsif ($token =~ /^(?:JOIN)$/i) { if ($last and $last !~ /^(?:LEFT|RIGHT|INNER|OUTER|CROSS)$/) { $self->_new_line; } $self->_add_token($token); } elsif ($token =~ /^(?:AND|OR)$/i) { $self->_new_line; $self->_add_token($token); # $self->_new_line; } elsif ($token =~ /^--/) { if (!$self->{no_comments}) { $self->_add_token($token); $self->_new_line; } } elsif ($token =~ /^\/\*.*\*\/$/s) { if (!$self->{no_comments}) { $token =~ s/\n[\s\t]+\*/\n\*/gs; $self->_new_line; $self->_add_token($token); $self->_new_line; } } else { $self->_add_token($token, $last); } $last = $token; } $self->_new_line; $self->{_output}; } # Add a token to the beautified string. sub _add_token { my ($self, $token, $last_token) = @_; if ($self->{wrap}) { my $wrap; if ($self->_is_keyword($token)) { $wrap = $self->{wrap}->{keywords}; } elsif ($self->_is_constant($token)) { $wrap = $self->{wrap}->{constants}; } if ($wrap) { $token = $wrap->[0] . $token . $wrap->[1]; } } my $last_is_dot = defined($last_token) && $last_token eq '.'; if (!$self->_is_punctuation($token) and !$last_is_dot) { $self->{_output} .= $self->_indent; } # uppercase keywords $token = uc $token if $self->_is_keyword($token) and $self->{uc_keywords}; $self->{_output} .= $token; # This can't be the beginning of a new line anymore. $self->{_new_line} = 0; } # Increase the indentation level. sub _over { my ($self) = @_; ++$self->{_level}; } # Decrease the indentation level. sub _back { my ($self) = @_; --$self->{_level} if ($self->{_level} > 0); } # Return a string of spaces according to the current indentation level and the # spaces setting for indenting. sub _indent { my ($self) = @_; if ($self->{_new_line}) { return $self->{space} x ($self->{spaces} * $self->{_level}); } else { return $self->{space}; } } # Add a line break, but make sure there are no empty lines. sub _new_line { my ($self) = @_; $self->{_output} .= $self->{break} unless ($self->{_new_line}); $self->{_new_line} = 1; } # Have a look at the token that's coming up next. sub _next_token { my ($self) = @_; return @{$self->{_tokens}} ? $self->{_tokens}->[0] : undef; } # Get the next token, removing it from the list of remaining tokens. sub _token { my ($self) = @_; return shift @{$self->{_tokens}}; } # Check if a token is a known SQL keyword. sub _is_keyword { my ($self, $token) = @_; return ~~ grep {$_ eq uc($token)} @{$self->{keywords}}; } # Add new keywords to highlight. sub add_keywords { my $self = shift; for my $keyword (@_) { push @{$self->{keywords}}, ref($keyword) ? @{$keyword} : $keyword; } } # Add new rules. sub add_rule { my ($self, $format, $token) = @_; my $rules = $self->{rules} ||= {}; my $group = $rules->{$format} ||= []; push @{$group}, ref($token) ? @{$token} : $token; } # Find custom rule for a token. sub _get_rule { my ($self, $token) = @_; values %{$self->{rules}}; # Reset iterator. while (my ($rule, $list) = each %{$self->{rules}}) { return $rule if (grep {uc($token) eq uc($_)} @$list); } return undef; } sub _process_rule { my ($self, $rule, $token) = @_; my $format = { break => sub {$self->_new_line}, over => sub {$self->_over}, back => sub {$self->_back}, token => sub {$self->_add_token($token)}, push => sub {push @{$self->{_level_stack}}, $self->{_level}}, pop => sub {$self->{_level} = pop(@{$self->{_level_stack}}) || 0}, reset => sub {$self->{_level} = 0; @{$self->{_level_stack}} = ();}, }; for (split /-/, lc $rule) { &{$format->{$_}} if ($format->{$_}); } } # Check if a token is a constant. sub _is_constant { my ($self, $token) = @_; return ($token =~ /^\d+$/ or $token =~ /^(['"`]).*\1$/); } # Check if a token is punctuation. sub _is_punctuation { my ($self, $token) = @_; return ($token =~ /^[,;.]$/); } } sub get_log_file { my $logf = shift; my $lfile = undef; # get file size my $totalsize = (stat("$logf"))[7] || 0; # Open a file handle if ($logf !~ /\.(gz|bz2|zip)/i) { open($lfile, $logf) || die "FATAL: cannot read log file $logf. $!\n"; $totalsize = 0 if ($lfile eq '-'); } else { my $uncompress = $zcat; if (($logf =~ /\.bz2/i) && ($zcat =~ /^$zcat_cmd$/)) { $uncompress = $bzcat; } elsif (($logf =~ /\.zip/i) && ($zcat =~ /^$zcat_cmd$/)) { $uncompress = $ucat; } &logmsg('DEBUG', "Compressed log file, will use command: $uncompress \"$logf\""); # Open a pipe to zcat program for compressed log open($lfile,"$uncompress \"$logf\" |") || die "FATAL: cannot read from pipe to $uncompress \"$logf\". $!\n"; # Real size of the file is unknown, try to find it # bz2 does not report real size $totalsize = 0; if ($logf =~ /\.(gz|zip)/i) { my $cmd_file_size = $gzip_uncompress_size; if ($logf =~ /\.zip/i) { $cmd_file_size = $zip_uncompress_size; } $cmd_file_size =~ s/\%f/$logf/g; $totalsize = `$cmd_file_size`; chomp($totalsize); } if ($queue_size) { $job_per_file = $queue_size; $queue_size = 0; } } # In list context returns the filehandle and the size of the file if (wantarray()) { return ($lfile, $totalsize); } # In scalar context return size only close($lfile); return $totalsize; } sub split_logfile { my $logf = shift; # CSV file can't be parsed using multiprocessing return (0, -1) if ( $format eq 'csv' ); # get file size my $totalsize = (stat("$logf"))[7] || 0; # Real size of the file is unknown, try to find it # bz2 does not report real size if ($logf =~ /\.(gz|zip)/i) { $totalsize = 0; my $cmd_file_size = $gzip_uncompress_size; if ($logf =~ /\.zip/i) { $cmd_file_size = $zip_uncompress_size; } $cmd_file_size =~ s/\%f/$logf/g; $totalsize = `$cmd_file_size`; chomp($totalsize); if ($queue_size) { $job_per_file = $queue_size; $queue_size = 0; } } elsif ($logf =~ /\.bz2/i) { $totalsize = 0; } return (0, -1) if (!$totalsize); my @chunks = (0); my $i = 1; while ($i < $queue_size) { push(@chunks, int(($totalsize/$queue_size) * $i)); $i++; } push(@chunks, $totalsize); return @chunks; } __DATA__