pgbadger-3.3/ 0000755 0001750 0001750 00000000000 12140236301 012416 5 ustar darold darold pgbadger-3.3/Makefile.PL 0000644 0001750 0001750 00000002407 12140236270 014400 0 ustar darold darold use ExtUtils::MakeMaker;
# See lib/ExtUtils/MakeMaker.pm for details of how to influence
# the contents of the Makefile that is written.
use strict;
my @ALLOWED_ARGS = ('INSTALLDIRS','DESTDIR');
# Parse command line arguments and store them as environment variables
while ($_ = shift) {
my ($k,$v) = split(/=/, $_, 2);
if (grep(/^$k$/, @ALLOWED_ARGS)) {
$ENV{$k} = $v;
}
}
$ENV{DESTDIR} =~ s/\/$//;
# Default install path
my $DESTDIR = $ENV{DESTDIR} || '';
my $INSTALLDIRS = $ENV{INSTALLDIRS} || 'site';
WriteMakefile(
'DISTNAME' => 'pgbadger',
'NAME' => 'pgBadger',
'VERSION_FROM' => 'pgbadger',
'dist' => {
'COMPRESS'=>'gzip -9f', 'SUFFIX' => 'gz',
'ZIP'=>'/usr/bin/zip','ZIPFLAGS'=>'-rl'
},
'AUTHOR' => 'Gilles Darold (gilles@darold.net)',
'ABSTRACT' => 'pgBadger - PostgreSQL log analysis report',
'EXE_FILES' => [ qw(pgbadger) ],
'MAN1PODS' => { 'doc/pgBadger.pod' => 'blib/man1/pgbadger.1' },
'DESTDIR' => $DESTDIR,
'INSTALLDIRS' => $INSTALLDIRS,
'clean' => {},
'META_MERGE' => {
resources => {
homepage => 'http://projects.dalibo.org/pgbadger',
repository => {
type => 'git',
git => 'git@github.com:dalibo/pgbadger.git',
web => 'https://github.com/dalibo/pgbadger',
},
},
}
);
pgbadger-3.3/README 0000644 0001750 0001750 00000041426 12140236270 013312 0 ustar darold darold NAME
pgBadger - a fast PostgreSQL log analysis report
SYNOPSIS
pgbadger [options] logfile [...]
PostgreSQL log analyzer with fully detailed reports and charts.
Arguments:
logfile can be a single log file, a list of files, or a shell command
returning a list of files. If you want to pass log content from stdin
use - as filename. Note that input from stdin will not work with csvlog.
Options:
-a | --average minutes : number of minutes to build the average graphs of
queries and connections.
-b | --begin datetime : start date/time for the data to be parsed in log.
-c | --dbclient host : only report on entries for the given client host.
-C | --nocomment : remove comments like /* ... */ from queries.
-d | --dbname database : only report on entries for the given database.
-e | --end datetime : end date/time for the data to be parsed in log.
-f | --format logtype : possible values: syslog,stderr,csv. Default: stderr
-G | --nograph : disable graphs on HTML output. Enable by default.
-h | --help : show this message and exit.
-i | --ident name : programname used as syslog ident. Default: postgres
-j | --jobs number : number of jobs to run on parallel on each log file.
Default is 1, run as single process.
-J | --Jobs number : number of log file to parse in parallel. Default
is 1, run as single process.
-l | --last-parsed file: allow incremental log parsing by registering the
last datetime and line parsed. Useful if you want
to watch errors since last run or if you want one
report per day with a log rotated each week.
-m | --maxlength size : maximum length of a query, it will be restricted to
the given size. Default: no truncate
-n | --nohighlight : disable SQL code highlighting.
-N | --appname name : only report on entries for given application name
-o | --outfile filename: define the filename for output. Default depends on
the output format: out.html, out.txt or out.tsung.
To dump output to stdout use - as filename.
-p | --prefix string : give here the value of your custom log_line_prefix
defined in your postgresql.conf. Only use it if you
aren't using one of the standard prefixes specified
in the pgBadger documentation, such as if your prefix
includes additional variables like client ip or
application name. See examples below.
-P | --no-prettify : disable SQL queries prettify formatter.
-q | --quiet : don't print anything to stdout, even not a progress bar.
-s | --sample number : number of query samples to store/display. Default: 3
-S | --select-only : use it if you want to report select queries only.
-t | --top number : number of queries to store/display. Default: 20
-T | --title string : change title of the HTML page report.
-u | --dbuser username : only report on entries for the given user.
-U | --exclude-user username : exclude entries for the specified user from report.
-v | --verbose : enable verbose or debug mode. Disabled by default.
-V | --version : show pgBadger version and exit.
-w | --watch-mode : only report errors just like logwatch could do.
-x | --extension : output format. Values: text, html or tsung. Default: html
-z | --zcat exec_path : set the full path to the zcat program. Use it if
zcat or bzcat or unzip is not on your path.
--pie-limit num : pie data lower than num% will show a sum instead.
--exclude-query regex : any query matching the given regex will be excluded
from the report. For example: "^(VACUUM|COMMIT)"
You can use this option multiple times.
--exclude-file filename: path of the file which contains all the regex to use
to exclude queries from the report. One regex per line.
--include-query regex : any query that does not match the given regex will be
excluded from the report. For example: "(table_1|table_2)"
You can use this option multiple times.
--include-file filename: path of the file which contains all the regex of the
queries to include from the report. One regex per line.
--disable-error : do not generate error report.
--disable-hourly : do not generate hourly report.
--disable-type : do not generate query type report.
--disable-query : do not generate query reports (slowest, most
frequent, ...).
--disable-session : do not generate session report.
--disable-connection : do not generate connection report.
--disable-lock : do not generate lock report.
--disable-temporary : do not generate temporary report.
--disable-checkpoint : do not generate checkpoint report.
--disable-autovacuum : do not generate autovacuum report.
Examples:
pgbadger /var/log/postgresql.log
pgbadger /var/log/postgres.log.2.gz /var/log/postgres.log.1.gz /var/log/postgres.log
pgbadger /var/log/postgresql/postgresql-2012-05-*
pgbadger --exclude-query="^(COPY|COMMIT)" /var/log/postgresql.log
pgbadger -b "2012-06-25 10:56:11" -e "2012-06-25 10:59:11" /var/log/postgresql.log
cat /var/log/postgres.log | pgbadger -
# log prefix with stderr log output
perl pgbadger --prefix '%t [%p]: [%l-1] user=%u,db=%d,client=%h' \
/pglog/postgresql-2012-08-21*
perl pgbadger --prefix '%m %u@%d %p %r %a : ' /pglog/postgresql.log
# Log line prefix with syslog log output
perl pgbadger --prefix 'user=%u,db=%d,client=%h,appname=%a' \
/pglog/postgresql-2012-08-21*
Use my 8 CPUs to parse my 10GB file faster, really faster
perl pgbadger -j 8 /pglog/postgresql-9.1-main.log
Generate Tsung sessions XML file with select queries only:
perl pgbadger -S -o sessions.tsung --prefix '%t [%p]: [%l-1] user=%u,db=%d ' /pglog/postgresql-9.1.log
Reporting errors every week by cron job:
30 23 * * 1 /usr/bin/pgbadger -q -w /var/log/postgresql.log -o /var/reports/pg_errors.html
Generate report every week using incremental behavior:
0 4 * * 1 /usr/bin/pgbadger -q `find /var/log/ -mtime -7 -name "postgresql.log*"` \
-o /var/reports/pg_errors-`date +%F`.html -l /var/reports/pgbadger_incremental_file.dat
This supposes that your log file and HTML report are also rotated every
week.
DESCRIPTION
pgBadger is a PostgreSQL log analyzer built for speed with fully
detailed reports from your PostgreSQL log file. It's a single and small
Perl script that aims to replace and out-perform the old PHP script
pgFouine.
By the way, we would like to thank Guillaume Smet for all the work he
has done on this really nice tool. We've been using it a long time, it
is a really great tool!
pgBadger is written in pure Perl language. It uses a Javascript library
to draw graphs so that you don't need additional Perl modules or any
other package to install. Furthermore, this library gives us additional
features, such as zooming.
pgBadger is able to autodetect your log file format (syslog, stderr or
csvlog). It is designed to parse huge log files, as well as gzip, zip or
bzip2 compressed files. See a complete list of features below.
FEATURE
pgBadger reports everything about your SQL queries:
Overall statistics.
The most frequent waiting queries.
Queries that waited the most.
Queries generating the most temporary files.
Queries generating the largest temporary files.
The slowest queries.
Queries that took up the most time.
The most frequent queries.
The most frequent errors.
The following reports are also available with hourly charts:
Hourly queries statistics.
Hourly temporary file statistics.
Hourly checkpoints statistics.
Hourly restartpoints statistics.
Locks statistics.
Queries by type (select/insert/update/delete).
Distribution of queries type per database/application
Sessions per database/user/client.
Connections per database/user/client.
Autovacuum and autoanalyze per table.
All charts are zoomable and can be saved as PNG images. SQL queries
reported are highlighted and beautified automatically.
REQUIREMENT
pgBadger comes as a single Perl script - you do not need anything other
than a modern Perl distribution. Charts are rendered using a Javascript
library so you don't need anything. Your browser will do all the work.
If you planned to parse PostgreSQL CSV log files you might need some
Perl Modules:
Text::CSV_XS - to parse PostgreSQL CSV log files.
This module is optional, if you don't have PostgreSQL log in the CSV
format you don't need to install it.
Compressed log file format is autodetected from the file exension. If
pgBadger find a gz extension it will use the zcat utility, with a bz2
extension it will use bzcat and if the file extension is zip then the
unzip utility will be used.
If those utilities are not found in the PATH environment variable then
use the --zcat command line option to change this path. For example:
--zcat="/usr/local/bin/gunzip -c" or --zcat="/usr/local/bin/bzip2 -dc"
--zcat="C:\tools\unzip -p"
By default pgBadger will use the zcat, bzcat and unzip utilities
following the file extension. If you use the default autodetection
compress format you can mixed gz, bz2 or zip files. Specifying a custom
value to --zcat option will remove this feature of mixed compressed
format.
Note that multiprocessing can not be used with compressed files or CSV
files as well as under Windows platform.
POSTGRESQL CONFIGURATION
You must enable and set some configuration directives in your
postgresql.conf before starting.
You must first enable SQL query logging to have something to parse:
log_min_duration_statement = 0
Here every statement will be logged, on busy server you may want to
increase this value to only log queries with a higher duration time.
Note that if you have log_statement set to 'all' nothing will be logged
with log_line_prefix. See next chapter for more information.
With 'stderr' log format, log_line_prefix must be at least:
log_line_prefix = '%t [%p]: [%l-1] '
Log line prefix could add user and database name as follows:
log_line_prefix = '%t [%p]: [%l-1] user=%u,db=%d '
or for syslog log file format:
log_line_prefix = 'user=%u,db=%d '
Log line prefix for stderr output could also be:
log_line_prefix = '%t [%p]: [%l-1] db=%d,user=%u '
or for syslog output:
log_line_prefix = 'db=%d,user=%u '
You need to enable other parameters in postgresql.conf to get more
information from your log files:
log_checkpoints = on
log_connections = on
log_disconnections = on
log_lock_waits = on
log_temp_files = 0
Do not enable log_statement as their log format will not be parsed by
pgBadger.
Of course your log messages should be in English without locale support:
lc_messages='C'
but this is not only recommended by pgBadger.
log_min_duration_statement, log_duration and log_statement
If you want full statistics reports you must set
log_min_duration_statement to 0 or more milliseconds.
If you just want to report duration and number of queries and don't want
all details about queries, set log_min_duration_statement to -1 to
disable it and enable log_duration in your postgresql.conf file. If you
want to add the most common request report you can either choose to set
log_min_duration_statement to a higher value or choose to enable
log_statement.
Enabling log_min_duration_statement will add reports about slowest
queries and queries that took up the most time. Take care that if you
have log_statement set to 'all' nothing will be logged with
log_line_prefix.
Parallel processing
To enable parallel processing you just have to use the -j N option where
N is the number of cores you want to use.
pgbadger will then proceed as follow:
for each log file
chunk size = int(file size / N)
look at start/end offsets of these chunks
fork N processes and seek to the start offset of each chunk
each process will terminate when the parser reach the end offset
of its chunk
each process write stats into a binary temporary file
wait for all children has terminated
All binary temporary files generated will then be read and loaded into
memory to build the html output.
With that method, at start/end of chunks pgbadger may truncate or omit a
maximum of N queries perl log file which is an insignificant gap if you
have millions of queries in your log file. The chance that the query
that you were looking for is loose is near 0, this is why I think this
gap is livable. Most of the time the query is counted twice but
truncated.
When you have lot of small log files and lot of CPUs it is speedier to
dedicate one core to one log file at a time. To enable this behavior you
have to use option -J N instead. With 200 log files of 10MB each the use
of the -J option start being really interesting with 8 Cores. Using this
method you will be sure to not loose any queries in the reports.
He are a benchmarck done on a server with 8 CPUs and a single file of
9.5GB.
Option | 1 CPU | 2 CPU | 4 CPU | 8 CPU
--------+---------+-------+-------+------
-j | 1h41m18 | 50m25 | 25m39 | 15m58
-J | 1h41m18 | 54m28 | 41m16 | 34m45
With 200 log files of 10MB each and a total og 2GB the results are
slightly different:
Option | 1 CPU | 2 CPU | 4 CPU | 8 CPU
--------+-------+-------+-------+------
-j | 20m15 | 9m56 | 5m20 | 4m20
-J | 20m15 | 9m49 | 5m00 | 2m40
So it is recommanded to use -j unless you have hundred of small log file
and can use at least 8 CPUs.
IMPORTANT: when you are using parallel parsing pgbadger will generate a
lot of temporary files in the /tmp directory and will remove them at
end, so do not remove those files unless pgbadger is not running. They
are all named with the following template tmp_pgbadgerXXXX.bin so they
can be easily identified.
INSTALLATION
Download the tarball from github and unpack the archive as follow:
tar xzf pgbadger-3.x.tar.gz
cd pgbadger-3.x/
perl Makefile.PL
make && sudo make install
This will copy the Perl script pgbadger to /usr/local/bin/pgbadger by
default and the man page into /usr/local/share/man/man1/pgbadger.1.
Those are the default installation directories for 'site' install.
If you want to install all under /usr/ location, use INSTALLDIRS='perl'
as an argument of Makefile.PL. The script will be installed into
/usr/bin/pgbadger and the manpage into /usr/share/man/man1/pgbadger.1.
For example, to install everything just like Debian does, proceed as
follows:
perl Makefile.PL INSTALLDIRS=vendor
By default INSTALLDIRS is set to site.
AUTHORS
pgBadger is an original work from Gilles Darold. It is maintained by the
good folk at Dalibo and everyone who wants to contribute.
LICENSE
pgBadger is free software distributed under the PostgreSQL Licence.
Copyright (c) 2012-2013, Dalibo
A modified version of the SQL::Beautify Perl Module is embedded in
pgBadger with copyright (C) 2009 by Jonas Kramer and is published under
the terms of the Artistic License 2.0.
pgbadger-3.3/.perltidyrc 0000644 0001750 0001750 00000000742 12140236270 014610 0 ustar darold darold --backup-and-modify-in-place
--backup-file-extension=beforeTidy
--block-brace-tightness=2
--brace-tightness=2
--closing-token-indentation=1
--continuation-indentation=4
--indent-columns=4
--maximum-line-length=134
--cuddled-else
--opening-sub-brace-on-new-line
--noopening-brace-on-new-line
--nooutdent-labels
--paren-tightness=2
--square-bracket-tightness=2
--vertical-tightness=0
--vertical-tightness-closing=0
--break-at-old-comma-breakpoints
--entab-leading-whitespace=4
--tabs
pgbadger-3.3/MANIFEST 0000644 0001750 0001750 00000000121 12140236270 013546 0 ustar darold darold LICENSE
Makefile.PL
MANIFEST
META.yml
pgbadger
README
doc/pgBadger.pod
ChangeLog
pgbadger-3.3/LICENSE 0000644 0001750 0001750 00000001607 12140236270 013434 0 ustar darold darold Copyright (c) 2012-2013, Dalibo
Permission to use, copy, modify, and distribute this software and its
documentation for any purpose, without fee, and without a written agreement
is hereby granted, provided that the above copyright notice and this
paragraph and the following two paragraphs appear in all copies.
IN NO EVENT SHALL Dalibo BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT,
SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS,
ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF
Dalibo HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Dalibo SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND Dalibo
HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS,
OR MODIFICATIONS.
pgbadger-3.3/doc/ 0000755 0001750 0001750 00000000000 12140236270 013170 5 ustar darold darold pgbadger-3.3/doc/pgBadger.pod 0000644 0001750 0001750 00000036766 12140236270 015431 0 ustar darold darold =head1 NAME
pgBadger - a fast PostgreSQL log analysis report
=head1 SYNOPSIS
pgbadger [options] logfile [...]
PostgreSQL log analyzer with fully detailed reports and charts.
Arguments:
logfile can be a single log file, a list of files, or a shell command
returning a list of files. If you want to pass log content from stdin
use - as filename. Note that input from stdin will not work with csvlog.
Options:
-a | --average minutes : number of minutes to build the average graphs of
queries and connections.
-b | --begin datetime : start date/time for the data to be parsed in log.
-c | --dbclient host : only report on entries for the given client host.
-C | --nocomment : remove comments like /* ... */ from queries.
-d | --dbname database : only report on entries for the given database.
-e | --end datetime : end date/time for the data to be parsed in log.
-f | --format logtype : possible values: syslog,stderr,csv. Default: stderr
-G | --nograph : disable graphs on HTML output. Enable by default.
-h | --help : show this message and exit.
-i | --ident name : programname used as syslog ident. Default: postgres
-j | --jobs number : number of jobs to run on parallel on each log file.
Default is 1, run as single process.
-J | --Jobs number : number of log file to parse in parallel. Default
is 1, run as single process.
-l | --last-parsed file: allow incremental log parsing by registering the
last datetime and line parsed. Useful if you want
to watch errors since last run or if you want one
report per day with a log rotated each week.
-m | --maxlength size : maximum length of a query, it will be restricted to
the given size. Default: no truncate
-n | --nohighlight : disable SQL code highlighting.
-N | --appname name : only report on entries for given application name
-o | --outfile filename: define the filename for output. Default depends on
the output format: out.html, out.txt or out.tsung.
To dump output to stdout use - as filename.
-p | --prefix string : give here the value of your custom log_line_prefix
defined in your postgresql.conf. Only use it if you
aren't using one of the standard prefixes specified
in the pgBadger documentation, such as if your prefix
includes additional variables like client ip or
application name. See examples below.
-P | --no-prettify : disable SQL queries prettify formatter.
-q | --quiet : don't print anything to stdout, even not a progress bar.
-s | --sample number : number of query samples to store/display. Default: 3
-S | --select-only : use it if you want to report select queries only.
-t | --top number : number of queries to store/display. Default: 20
-T | --title string : change title of the HTML page report.
-u | --dbuser username : only report on entries for the given user.
-U | --exclude-user username : exclude entries for the specified user from report.
-v | --verbose : enable verbose or debug mode. Disabled by default.
-V | --version : show pgBadger version and exit.
-w | --watch-mode : only report errors just like logwatch could do.
-x | --extension : output format. Values: text, html or tsung. Default: html
-z | --zcat exec_path : set the full path to the zcat program. Use it if
zcat or bzcat or unzip is not on your path.
--pie-limit num : pie data lower than num% will show a sum instead.
--exclude-query regex : any query matching the given regex will be excluded
from the report. For example: "^(VACUUM|COMMIT)"
You can use this option multiple times.
--exclude-file filename: path of the file which contains all the regex to use
to exclude queries from the report. One regex per line.
--include-query regex : any query that does not match the given regex will be
excluded from the report. For example: "(table_1|table_2)"
You can use this option multiple times.
--include-file filename: path of the file which contains all the regex of the
queries to include from the report. One regex per line.
--disable-error : do not generate error report.
--disable-hourly : do not generate hourly report.
--disable-type : do not generate query type report.
--disable-query : do not generate query reports (slowest, most
frequent, ...).
--disable-session : do not generate session report.
--disable-connection : do not generate connection report.
--disable-lock : do not generate lock report.
--disable-temporary : do not generate temporary report.
--disable-checkpoint : do not generate checkpoint report.
--disable-autovacuum : do not generate autovacuum report.
Examples:
pgbadger /var/log/postgresql.log
pgbadger /var/log/postgres.log.2.gz /var/log/postgres.log.1.gz /var/log/postgres.log
pgbadger /var/log/postgresql/postgresql-2012-05-*
pgbadger --exclude-query="^(COPY|COMMIT)" /var/log/postgresql.log
pgbadger -b "2012-06-25 10:56:11" -e "2012-06-25 10:59:11" /var/log/postgresql.log
cat /var/log/postgres.log | pgbadger -
# log prefix with stderr log output
perl pgbadger --prefix '%t [%p]: [%l-1] user=%u,db=%d,client=%h' \
/pglog/postgresql-2012-08-21*
perl pgbadger --prefix '%m %u@%d %p %r %a : ' /pglog/postgresql.log
# Log line prefix with syslog log output
perl pgbadger --prefix 'user=%u,db=%d,client=%h,appname=%a' \
/pglog/postgresql-2012-08-21*
Use my 8 CPUs to parse my 10GB file faster, really faster
perl pgbadger -j 8 /pglog/postgresql-9.1-main.log
Generate Tsung sessions XML file with select queries only:
perl pgbadger -S -o sessions.tsung --prefix '%t [%p]: [%l-1] user=%u,db=%d ' /pglog/postgresql-9.1.log
Reporting errors every week by cron job:
30 23 * * 1 /usr/bin/pgbadger -q -w /var/log/postgresql.log -o /var/reports/pg_errors.html
Generate report every week using incremental behavior:
0 4 * * 1 /usr/bin/pgbadger -q `find /var/log/ -mtime -7 -name "postgresql.log*"` \
-o /var/reports/pg_errors-`date +%F`.html -l /var/reports/pgbadger_incremental_file.dat
This supposes that your log file and HTML report are also rotated every week.
=head1 DESCRIPTION
pgBadger is a PostgreSQL log analyzer built for speed with fully detailed reports from your PostgreSQL log file. It's a single and small Perl script that aims to replace and out-perform the old PHP script pgFouine.
By the way, we would like to thank Guillaume Smet for all the work he has done on this really nice tool. We've been using it a long time, it is a really great tool!
pgBadger is written in pure Perl language. It uses a Javascript library to draw graphs so that you don't need additional Perl modules or any other package to install. Furthermore, this library gives us additional features, such as zooming.
pgBadger is able to autodetect your log file format (syslog, stderr or csvlog). It is designed to parse huge log files, as well as gzip, zip or bzip2 compressed files. See a complete list of features below.
=head1 FEATURE
pgBadger reports everything about your SQL queries:
Overall statistics.
The most frequent waiting queries.
Queries that waited the most.
Queries generating the most temporary files.
Queries generating the largest temporary files.
The slowest queries.
Queries that took up the most time.
The most frequent queries.
The most frequent errors.
The following reports are also available with hourly charts:
Hourly queries statistics.
Hourly temporary file statistics.
Hourly checkpoints statistics.
Hourly restartpoints statistics.
Locks statistics.
Queries by type (select/insert/update/delete).
Distribution of queries type per database/application
Sessions per database/user/client.
Connections per database/user/client.
Autovacuum and autoanalyze per table.
All charts are zoomable and can be saved as PNG images. SQL queries reported are highlighted and beautified automatically.
=head1 REQUIREMENT
pgBadger comes as a single Perl script - you do not need anything other than a modern Perl distribution. Charts are rendered using a Javascript library so you don't need anything. Your browser will do all the work.
If you planned to parse PostgreSQL CSV log files you might need some Perl Modules:
Text::CSV_XS - to parse PostgreSQL CSV log files.
This module is optional, if you don't have PostgreSQL log in the CSV format you don't need to install it.
Compressed log file format is autodetected from the file exension. If pgBadger find a gz extension
it will use the zcat utility, with a bz2 extension it will use bzcat and if the file extension is zip
then the unzip utility will be used.
If those utilities are not found in the PATH environment variable then use the --zcat command line option
to change this path. For example:
--zcat="/usr/local/bin/gunzip -c" or --zcat="/usr/local/bin/bzip2 -dc"
--zcat="C:\tools\unzip -p"
By default pgBadger will use the zcat, bzcat and unzip utilities following the
file extension. If you use the default autodetection compress format you can
mixed gz, bz2 or zip files. Specifying a custom value to --zcat option will
remove this feature of mixed compressed format.
Note that multiprocessing can not be used with compressed files or CSV files as
well as under Windows platform.
=head1 POSTGRESQL CONFIGURATION
You must enable and set some configuration directives in your postgresql.conf
before starting.
You must first enable SQL query logging to have something to parse:
log_min_duration_statement = 0
Here every statement will be logged, on busy server you may want to increase
this value to only log queries with a higher duration time. Note that if you
have log_statement set to 'all' nothing will be logged with log_line_prefix.
See next chapter for more information.
With 'stderr' log format, log_line_prefix must be at least:
log_line_prefix = '%t [%p]: [%l-1] '
Log line prefix could add user and database name as follows:
log_line_prefix = '%t [%p]: [%l-1] user=%u,db=%d '
or for syslog log file format:
log_line_prefix = 'user=%u,db=%d '
Log line prefix for stderr output could also be:
log_line_prefix = '%t [%p]: [%l-1] db=%d,user=%u '
or for syslog output:
log_line_prefix = 'db=%d,user=%u '
You need to enable other parameters in postgresql.conf to get more information from your log files:
log_checkpoints = on
log_connections = on
log_disconnections = on
log_lock_waits = on
log_temp_files = 0
Do not enable log_statement as their log format will not be parsed by pgBadger.
Of course your log messages should be in English without locale support:
lc_messages='C'
but this is not only recommended by pgBadger.
=head1 log_min_duration_statement, log_duration and log_statement
If you want full statistics reports you must set log_min_duration_statement
to 0 or more milliseconds.
If you just want to report duration and number of queries and don't want all
details about queries, set log_min_duration_statement to -1 to disable it and
enable log_duration in your postgresql.conf file. If you want to add the most
common request report you can either choose to set log_min_duration_statement
to a higher value or choose to enable log_statement.
Enabling log_min_duration_statement will add reports about slowest queries and
queries that took up the most time. Take care that if you have log_statement
set to 'all' nothing will be logged with log_line_prefix.
=head1 Parallel processing
To enable parallel processing you just have to use the -j N option where N is
the number of cores you want to use.
pgbadger will then proceed as follow:
for each log file
chunk size = int(file size / N)
look at start/end offsets of these chunks
fork N processes and seek to the start offset of each chunk
each process will terminate when the parser reach the end offset
of its chunk
each process write stats into a binary temporary file
wait for all children has terminated
All binary temporary files generated will then be read and loaded into
memory to build the html output.
With that method, at start/end of chunks pgbadger may truncate or omit a
maximum of N queries perl log file which is an insignificant gap if you have
millions of queries in your log file. The chance that the query that you were
looking for is loose is near 0, this is why I think this gap is livable. Most
of the time the query is counted twice but truncated.
When you have lot of small log files and lot of CPUs it is speedier to dedicate
one core to one log file at a time. To enable this behavior you have to use
option -J N instead. With 200 log files of 10MB each the use of the -J option
start being really interesting with 8 Cores. Using this method you will be sure
to not loose any queries in the reports.
He are a benchmarck done on a server with 8 CPUs and a single file of 9.5GB.
Option | 1 CPU | 2 CPU | 4 CPU | 8 CPU
--------+---------+-------+-------+------
-j | 1h41m18 | 50m25 | 25m39 | 15m58
-J | 1h41m18 | 54m28 | 41m16 | 34m45
With 200 log files of 10MB each and a total og 2GB the results are slightly
different:
Option | 1 CPU | 2 CPU | 4 CPU | 8 CPU
--------+-------+-------+-------+------
-j | 20m15 | 9m56 | 5m20 | 4m20
-J | 20m15 | 9m49 | 5m00 | 2m40
So it is recommanded to use -j unless you have hundred of small log file
and can use at least 8 CPUs.
IMPORTANT: when you are using parallel parsing pgbadger will generate a lot
of temporary files in the /tmp directory and will remove them at end, so do
not remove those files unless pgbadger is not running. They are all named
with the following template tmp_pgbadgerXXXX.bin so they can be easily identified.
=head1 INSTALLATION
Download the tarball from github and unpack the archive as follow:
tar xzf pgbadger-3.x.tar.gz
cd pgbadger-3.x/
perl Makefile.PL
make && sudo make install
This will copy the Perl script pgbadger to /usr/local/bin/pgbadger by default and the man page into /usr/local/share/man/man1/pgbadger.1. Those are the default installation directories for 'site' install.
If you want to install all under /usr/ location, use INSTALLDIRS='perl' as an argument of Makefile.PL. The script will be installed into /usr/bin/pgbadger and the manpage into /usr/share/man/man1/pgbadger.1.
For example, to install everything just like Debian does, proceed as follows:
perl Makefile.PL INSTALLDIRS=vendor
By default INSTALLDIRS is set to site.
=head1 AUTHORS
pgBadger is an original work from Gilles Darold. It is maintained by the good folk at Dalibo and everyone who wants to contribute.
=head1 LICENSE
pgBadger is free software distributed under the PostgreSQL Licence.
Copyright (c) 2012-2013, Dalibo
A modified version of the SQL::Beautify Perl Module is embedded in pgBadger
with copyright (C) 2009 by Jonas Kramer and is published under the terms of
the Artistic License 2.0.
pgbadger-3.3/.gitignore 0000644 0001750 0001750 00000000023 12140236270 014406 0 ustar darold darold # Swap files
*.swp
pgbadger-3.3/CONTRIBUTING.md 0000644 0001750 0001750 00000000560 12140236270 014655 0 ustar darold darold # How to contribute #
##Before Submitting an issue##
1. Upgrade to the latest version of pgBadger and see if the problem remains
2. Look at the [closed issues](https://github.com/dalibo/pgbadger/issues?state=closed), we may have alreayd answered to a similar problem
3. [Read the doc](http://dalibo.github.com/pgbadger/documentation.html). It is short and useful.
pgbadger-3.3/ChangeLog 0000644 0001750 0001750 00000074110 12140236270 014200 0 ustar darold darold 2013-05-01 - Version 3.3
This release adds four more useful reports about queries that generate locks and
temporary files. An other new report about restartpoint on slaves and several
bugs fix or cosmetic change. Support to parallel processing under Windows OS has
been removed.
- Remove parallel processing under Windows platform, the use of waitpid
is freezing pgbadger. Thanks to Saurabh Agrawal for the report. I'm
not comfortable with that OS this is why support have been removed,
if someone know how to fix that, please submit a patch.
- Fix Error in tempfile() under Windows. Thanks to Saurabh Agrawal for
the report.
- Fix wrong queries storage with lock and temporary file reports. Thanks
to Thomas Reiss for the report.
- Add samples queries to "Most frequent waiting queries" and "Queries
generating the most temporary files" report.
- Add two more reports about locks: 'Most frequent waiting queries (N)",
and "Queries that waited the most". Thanks to Thomas Reiss for the
patch.
- Add two reports about temporary files: "Queries generating the most
temporary files (N)" and "Queries generating the largest temporary
files". Thanks to Thomas Reiss for the patch.
- Cosmetic change to the Min/Max/Avg duration columns.
- Fix report of samples error with csvlog format. Thanks to tpoindessous
for the report.
- Add --disable-autovacuum to the documentation. Thanks to tpoindessous
for the report.
- Fix unmatched ) in regex when using %s in prefix.
- Fix bad average size of temporary file in Overall statistics report.
Thanks to Jehan Guillaume de Rorthais for the report.
- Add restartpoint reporting. Thanks to Guillaume Lelarge for the patch.
- Made some minor change in CSS.
- Replace %% in log line prefix internally by a single % so that it
could be exactly the same than in log_line_prefix. Thanks to Cal
Heldenbrand for the report.
- Fix perl documentation header, thanks to Cyril Bouthors for the patch.
2013-04-07 - Version 3.2
This is mostly a bug fix release, it also adds escaping of HTML code inside
queries and the adds Min/Max reports with Average duration in all queries
reports.
- In multiprocess mode, fix case where pgbadger does not update
the last-parsed file and do not take care of the previous run.
Thanks to Kong Man for the report.
- Fix case where pgbadger does not update the last-parsed file.
Thanks to Kong Man for the report.
- Add CDATA to make validator happy. Thanks to Euler Taveira de
Oliveira for the patch.
- Some code review by Euler Taveira de Oliveira, thanks for the
patch.
- Fix case where stat were multiplied by N when -J was set to N.
Thanks to thegnorf for the report.
- Add a line in documentation about log_statement that disable
log_min_duration_statement when it is set to all.
- Add quick note on how to contribute, thanks to Damien Clochard
for the patch.
- Fix issue with logs read from stdin. Thanks to hubert depesz
lubaczewski for the report.
- Force pgbadger to not try to beautify queries bigger than 10kb,
this will take too much time. This value can be reduce in the
future if hang with long queries still happen. Thanks to John
Rouillard for the report.
- Fix an other issue in replacing bind param when the bind value
is alone on a single line. Thanks to Kjeld Peters for the report.
- Fix parsing of compressed files together with uncompressed files
using the the -j option. Uncompressed files are now processed using
split method and compressed ones are parsed per one dedicated process.
- Replace zcat by gunzip -c to fix an issue on MacOsx. Thanks to
Kjeld Peters for the report.
- Escape HTML code inside queries. Thanks to denstark for the report.
- Add Min/Max in addition to Average duration values in queries reports.
Thanks to John Rouillard fot the feature request.
- Fix top slowest array size with binary format.
- Fix an other case with bind parameters with value in next line and
the top N slowest queries that was repeated until N even if the real
number of queries was lower. Thanks to Kjeld Peters for the reports.
- Fix non replacement of bind parameters where there is line breaks in
the parameters, aka multiline bind parameters. Thanks to Kjeld Peters
for the report.
- Fix error with seekable export tag with Perl v5.8. Thanks to Jeff Bohmer
for the report.
- Fix parsing of non standard syslog lines begining with a timestamp like
"2013-02-28T10:35:11-05:00". Thanks to Ryan P. Kelly for the report.
- Fix issue #65 where using -c | --dbclient with csvlog was broken. Thanks
to Jaime Casanova for the report.
- Fix empty report in watchlog mode (-w option).
2013-02-21 - Version 3.1
This is a quick release to fix missing reports of most frequent errors and slowest
normalized queries in previous version published yesterday.
- Fix empty report in watchlog mode (-w option).
- Force immediat die on command line options error.
- Fix missing report of most frequent events/errors report. Thanks to
Vincent Laborie for the report.
- Fix missing report of slowest normalized queries. Thanks to Vincent
Laborie for the report.
- Fix display of last print of progress bar when quiet mode is enabled.
2013-02-20 - Version 3.0
This new major release adds parallel log processing by using as many cores as
wanted to parse log files, the performances gain is directly related to the
number of cores specified. There's also new reports about autovacuum/autoanalyze
informations and many bugs have been fixed.
- Update documentation about log_duration, log_min_duration_statement
and log_statement.
- Rewrite dirty code around log timestamp comparison to find timestamp
of the specified begin or ending date.
- Remove distinction between logs with duration enabled from variables
log_min_duration_statement and log_duration. Commands line options
--enable-log_duration and --enable-log_min_duration have been removed.
- Update documentation about parallel processing.
- Remove usage of Storable::file_magic to autodetect binary format file,
it is not include in core perl 5.8. Thanks to Marc Cousin for the
report.
- Force multiprocess per file when files are compressed. Thanks to
Julien Rouhaud for the report.
- Add progress bar logger for multiprocess by forking a dedicated
process and using pipe. Also fix some bugs in using binary format
that duplicate query/error samples per process.
- chmod 755 pgbadger
- Fix checkpoint reports when there is no checkpoint warnings.
- Fix non report of hourly connections/checkpoint/autovacuum when not
query is found in log file. Thanks to Guillaume Lelarge for the
report.
- Add better handling of signals in multiprocess mode.
- Add -J|--job_per_file command line option to force pgbadger to use
one process per file instead of using all to parse one file. Useful
to have better performances with lot of small log file.
- Fix parsing of orphan lines with stderr logs and log_line_prefix
without session information into the prefix (%l).
- Update documentation about -j | --jobs option.
- Allow pgbadger to use several cores, aka multiprocessing. Add options
-j | --jobs option to specify the number of core to use.
- Add autovacuum and autoanalyze infos to binary format.
- Fix case in SQL code highlighting where QQCODE temp keyword was not
replaced. Thanks to Julien Ruhaud for the report.
- Fix CSS to draw autovacuum graph and change legend opacity.
- Add pie graph to show repartition of number of autovacuum per table
and number of tuples removed by autovacuum per table.
- Add debug information about selected type of log duration format.
- Add report of tuples/pages removed in report of Vacuums by table.
- Fix major bug on syslog parser where years part of the date was
wrongly extracted from current date with logs generated in 2012.
- Fix issue with Perl 5.16 that do not allow "ss" inside look-behind
assertions. Thanks to Cedric for the report.
- New vacuum and analyze hourly reports and graphs. Thanks to Guillaume
Lelarge for the patch.
UPGRADE: if you are running pgbadger by cron take care if you were using one of
the following option: --enable-log_min_duration and --enable-log_duration, they
have been removed and pgbadger will refuse to start.
2013-01-17 - Version 2.3
This release fixes several major issues especially with csvlog and a memory leak
with log parsing using a start date. There's also several improvement like new
reports of number of queries by database and application. Mouse over reported
queries will show database, user, remote client and application name where they
are executed.
A new binary input/output format have been introduced to allow saving or reading
precomputed statistics. This will allow incremental reports based on periodical
runs of pgbader. This is a work in progress fully available with next coming
major release.
Several SQL code beautifier improvement from pgFormatter have also been merged.
- Clarify misleading statement about log_duration: log_duration may be
turned on depending on desired information. Only log_statement must
not be on. Thanks to Matt Romaine for the patch.
- Fix --dbname and --dbuser not working with csvlog format. Thanks to
Luke Cyca for the report.
- Fix issue in SQL formatting that prevent left back indentation when
major keywords were found. Thanks to Kevin Brannen for the report.
- Display 3 decimals in time report so that ms can be seen. Thanks to
Adam Schroder for the request.
- Force the parser to not insert a new line after the SET keyword when
the query begin with it. This is to preserve the single line with
queries like SET client_encoding TO "utf8";
- Add better SQL formatting of update queries by adding a new line
after the SET keyword. Thanks to pilat66 for the report.
- Update copyright and documentation.
- Queries without application name are now stored under others
application name.
- Add report of number of queries by application if %a is specified in
the log_line_prefix.
- Add link menu to the request per database and limit the display of
this information when there is more than one database.
- Add report of requests per database.
- Add report of user,remote client and application name to all request
info.
- Fix memory leak with option -b (--begin) and in incremental log
parsing mode.
- Remove duration part from log format auto-detection. Thanks to
Guillaume Lelarge for the report.
- Fix a performance issue on prettifying SQL queries that makes pgBagder
several time slower that usual to generate the HTML output. Thanks to
Vincent Laborie for the report.
- Add missing SQL::Beautify paternity.
- Add 'binary' format as input/output format. The binary output format
allows to save log statistics in a non human readable file instead of
an HTML or text file. These binary files might then be used as regular
input files, combined or not, to produce a html or txt report. Thanks
to Jehan Guillaume de Rorthais for the patch.
- Remove port from the session regex pattern to match all lines.
- Fix the progress bar. It was trying to use gunzip to get real file
size for all formats (by default). Unbreak the bz2 format (that does
not report real size) and add support for zip format. Thanks to Euler
Taveira de Oliveira fort the patch.
- Fix some typos and grammatical issues. Thanks to Euler Taveira de
Oliveira fort the patch.
- Improve SQL code highlighting and keywords detection merging change
from pgFormatter project.
- Add support to hostname or ip address in the client detection. Thanks
to stuntmunkee for the report.
- pgbadger will now only reports execute statement of the extended
protocol (parse/bind/execute). Thanks to pierrestroh for the report.
- Fix numerous typos as well as formatting and grammatical issues.
Thanks to Thom Brown for the patch.
- Add backward compatibility to obsolete --client command line option.
If you were using the short option -c nothing is changed.
- Fix issue with --dbclient and %h in log_line_prefix. Thanks to Julien
Rouhaud for the patch.
- Fix multiline progress bar output.
- Allow usage of a dash into database, user and application names when
prefix is used. Thanks to Vipul for the report.
- Mouse over queries will now show in which database they are executed
in the overviews (Slowest queries, Most frequent queries, etc. ).
Thank to Dirk-Jan Bulsink for the feature request.
- Fix missing keys on %cur_info hash. Thanks to Marc Cousin for the
report.
- Move opening file handle to log file into a dedicated function.
Thanks to Marc Cousin for the patch.
- Replace Ctrl+M by printable \r. Thanks to Marc Cousin for the report.
2012-11-13 - Version 2.2
This release add some major features like tsung output, speed improvement with
csvlog, report of shut down events, new command line options to generate report
excluding some user(s), to build report based on select queries only, to specify
regex of the queries that must only be included in the report and to remove
comments from queries. Lot of bug fixes, please upgrade.
- Update PostgreSQL keywords list for 9.2
- Fix number of queries in progress bar with tsung output.
- Remove obsolete syslog-ng and temporary syslog-ll log format added to
fix some syslog autodetection issues. There is now just one syslog
format: syslog, differences between syslog formats are detected and
the log parser is adaptive.
- Add comment about the check_incremental_position() method
- Fix reports with empty graphs when log files were not in chronological
order.
- Add report of current total of queries and events parsed in progress
bar. Thanks to Jehan-Guillaume de Rorthais for the patch.
- Force pgBadger to use an require the XS version of Text::CSV instead
of the Pure Perl implementation. It is a good bit faster thanks to
David Fetter for the patch. Note that using csvlog is still a bit
slower than syslog or stderr log format.
- Fix several issue with tsung output.
- Add report of shut down events
- Add debug information on command line used to pipe compressed log
file when -v is provide.
- Add -U | --exclude-user command line option to generate report
excluded user. Thanks to Birta Levente for the feature request.
- Allow some options to be specified multiple time or be written as a
coma separated list of value, here are these options: --dbname,
--dbuser, --dbclient, --dbappname, --exclude_user.
- Add -S | --select-only option to build report only on select queries.
- Add first support to tsung output, see usage. Thanks to Guillaume
Lelarge for the feature request.
- Add --include-query and --include-file to specify regex of the queries
that must only be included in the report. Thanks to Marc Cousin for
the feature request.
- Fix auto detection of log_duration and log_min_duration_statement
format.
- Fix parser issue with Windows logs without timezone information.
Thanks to Nicolas Thauvin for the report.
- Fix bug in %r = remote host and port log line prefix detection.
Thanks to Hubert Depesz Lubaczewski for the report.
- Add -C | --nocomment option to remove comment like /* ... */ from
queries. Thanks to Hubert Depesz Lubaczewski for the feature request.
- Fix escaping of log_line_prefix. Thanks to Hubert Depesz Lubaczewski
for the patch.
- Fix wrong detection of update queries when a query has a object names
containing update and set. Thanks to Vincent Laborie for the report.
2012-10-10 - Version 2.1
This release add a major feature by allowing any custom log_line_prefix to be
used by pgBadger. With stderr output you at least need to log the timestamp (%t)
the pid (%p) and the session/line number (%l). Support to log_duration instead
of log_min_duration_statement to allow reports simply based on duration and
count report without query detail and report. Lot of bug fixes, please upgrade
asap.
- Add new --enable-log_min_duration option to force pgbadger to use lines
generated by the log_min_duration_statement even if the log_duration
format is autodetected. Useful if you use both but do not log all queries.
Thanks to Vincent Laborie for the feature request.
- Add syslog-ng format to better handle syslog traces with notation like:
[ID * local2.info]. It is autodetected but can be forced in the -f option
with value set to: syslog-ng.
- Add --enable-log_duration command line option to force pgbadger to only
use the log_duration trace even if log_min_duration_statement traces are
autodetected.
- Fix display of empty hourly graph when no data were found.
- Remove query type report when log_duration is enabled.
- Fix a major bug in query with bind parameter. Thanks to Marc Cousin for
the report.
- Fix detection of compressed log files and allow automatic detection
and uncompress of .gz, .bz2 and .zip files.
- Add gunzip -l command to find the real size of a gzip compressed file.
- Fix log_duration only reports to not take care about query detail but
just count and duration.
- Fix issue with compressed csvlog. Thanks to Philip Freeman for the
report.
- Allow usage of log_duration instead of log_min_duration_statement to
just collect statistics about the number of queries and their time.
Thanks to Vincent Laborie for the feature request.
- Fix issue on syslog format and autodetect with additional info like:
[ID * local2.info]. Thanks to kapsalar for the report.
- Removed unrecognized log line generated by deadlock_timeout.
- Add missing information about unsupported csv log input from stdin.
It must be read from a file. Thank to Philip Freeman for the report.
- Fix issue #28: Illegal division by zero with log file without query
and txt output. Thanks to rlowe for the report.
- Update documentation about the -N | --appname option.
- Rename --name option into --appname. Thanks to Guillaume Lellarge for
the patch.
- Fix min/max value in xasis that was always represented 2 days by
default. Thanks to Casey Allen Shobe for the report.
- Fix major bug when running pgbadger with the -e option. Thanks to
Casey Allen Shobe for the report and the great help
- Change project url to http://dalibo.github.com/pgbadger/. Thanks to
Damien Clochard for this new hosting.
- Fix lot of issues in CSV parser and force locale to be C. Thanks to
Casey Allen Shobe for the reports.
- Improve speed with custom log_line_prefix.
- Merge pull request #26 from elementalvoid/helpdoc-fix
- Fixed help text for --exclude-file. Old help text indicated that the
option name was --exclude_file which was incorrect.
- Remove the obsolete --regex-user and --regex-db options that was used
to specify a search pattern in the log_line_prefix to find the user
and db name. This is replaced by the --prefix option.
- Replace Time column report header by Hour.
- Fix another issue in log_line_prefix parser with stderr format
- Add a more complex example using log_line_prefix
- Fix log_line_prefix issue when using timepstamp with millisecond.
- Add support to use any custom log_line_prefix with new option -p or
--prefix. See README for an example.
- Fix false autodetection of CSV format when log_statement is enable or
in possible other cases. This was resulting in error: "FATAL: cannot
use CSV". Thanks to Thomas Reiss for the report.
- Fix display of empty graph of connections per seconds
- Allow character : in log line prefix, it will no more break the log
parsing. Thanks to John Rouillard for the report.
- Add report of configuration parameter changes into the errors report
and change errors report by events report to handle important messages
that are not errors.
- Allow pgbadger to recognize " autovacuum launcher" messages.
2012-08-21 - version 2.0
This major version adds some changes not backward compatible with previous
versions. Options -p and -g are not more used as progress bar and graphs
generation are enabled by default now.
The obsolete -l option use to specify the log file to parse has been reused to
specify an incremental file. Outside these changes and some bug fix there's
also new features:
* Using an incremental file with -l option allow to parse multiple time a
single log file and to "seek" at the last line parsed during the previous
run. Useful if you have a log rotation not sync with your pgbadger run.
For exemple you can run somthing like this:
pgbadger `find /var/log/postgresql/ -name "postgresql*" -mtime -7 -type f` \
-o report_`date +%F`.html -l /var/run/pgbadger/last_run.log
* All queries diplayed in the HTML report are now clickable to display or
hide a nice SQL query format. This is called SQL format beautifier.
* CSV log parser have been entirely rewritten to handle csv with multiline.
Every one should upgrade.
- Change license from BSD like to PostgreSQL license. Request from
Robert Treat.
- Fix wrong pointer on Connections per host menu. Reported by Jean-Paul
Argudo.
- Small fix for sql formatting adding scrollbars. Patch by Julien
Rouhaud.
- Add SQL format beautifier on SQL queries. When you will click on a
query it will be beautified. Patch by Gilles Darold
- The progress bar is now enabled by default, the -p option has been
removed. Use -q | --quiet to disable it. Patch by Gilles Darold.
- Graphs are now generated by default for HTML output, option -g as
been remove and option -G added to allow disabling graph generation.
Request from Julien Rouhaud, patch by Gilles Darold.
- Remove option -g and -p to the documentation. Patch by Gilles Darold.
- Fix case sensitivity in command line options. Patch by Julien Rouhaud.
- Add -T|--title option to change report title. Patch by Yury Bushmelev.
- Add new option --exclude-file to exclude specific commands with regex
stated in a file. This is a rewrite by Gilles Darold of the neoeahit
(Vipul) patch.
- CSV log parser have been entirely rewritten to handle csv with multi
line, it also adds approximative duration for csvlog. Reported by
Ludhimila Kendrick, patch by Gilles Darold.
- Alphabetical reordering of options list in method usage() and
documentation. Patch by Gilles Darold.
- Remove obsolete -l | --logfile command line option, the -l option
will be reused to specify an incremental file. Patch by Gilles Darold.
- Add -l | --last-parsed options to allow incremental run of pgbadger.
Patch by Gilles Darold.
- Replace call to timelocal_nocheck by timegm_nocheck, to convert date
time into second from the epoch. This should fix timezone issue.
Patch by Gilles Darold.
- Change regex on log parser to allow missing ending space in
log_line_prefix. This seems a common mistake. Patch by Gilles Darold.
- print warning when an empty log file is found. Patch by Gilles Darold.
- Add perltidy rc file to format pgbadger Perl code. Patch from depesz.
2012-07-15 - version 1.2
This version adds some reports and fixes a major issue in log parser. Every one
should upgrade.
- Rewrite this changelog to be human readable.
- Add -v | --verbose to enable debug mode. It is now disable by default
- Add hourly report of checkpoint warning when checkpoints are occuring
too frequently, it will display the hourly count and the average
occuring time.
- Add new report that sums the messages by log types. The report shows
the number of messages of each log type, and a percentage. It also
displays a pie graph. Patch by Guillaume Lelarge.
- Add missing pie graph on locks by type report.
- Format pie mouse track to display values only.
- Fix graph download button id on new connection graph.
- Add trackFormatter to flotr2 line graphs to show current x/y values.
- Fix issue on per minute minimum value.
- Add a note about Windows Os and zcat as well as a more general note
about using compressed log file in other format than gzip.
- Complete rewrite of the log parser to handle unordered log lines.
Data are now stored by pid before and added to the global statistics
at end. Error report now include full details, statements, contexts
and hints when available. Deadlock are also fully reported with the
concerned queries.
- Fix miss handling of multi lines queries on syslog.
- Add -a|--average option to configure the per minutes average interval
for queries and connexions. If you want the average to be calculated
each minutes instead of the 5 per default, use --average 1 or for the
default --average 5. If you want average per hour set it to 60.
- Add hourly statistics of connections and sessions as well as a chart
about the number of connection per second (5 minutes average).
- Allow OTHERS type of queries lower than 2% to be include in the sum of
types < 2%.
- Add autodetection of syslog ident name if different than the default
"postgres" and that there is just one ident name in the log.
- Remove syslog replacement of tabulation by #011 still visible when
there was multiple tabulation.
- Fix autodetection of log format syslog with single-digit day number
in date.
- Add ChangeLog to MANIFEST and change URI in html footer.
- Check pgBadger compatibility with Windows Oses. Run perfectly.
2012-07-04 - version 1.1
This release fixes lot of issues and adds several main features.
New feature:
- Add possibility to get log from stdin
- Change syslog parsing regex to allow log timestamp in log_line_prefix
very often forgotten when log destination is changed from stderr to
syslog.
- Add documentation for the -z | --zcat command line option.
- Allow `zcat` location to be specified via `--zcat` - David E. Wheeler
- Add --disable-session,--disable-connection and disable-checkpoint
command line options to remove their respective reports from the
output
- Add --disable-query command line option to remove queries statistics
from the output
- Add --disable-hourly command line option to remove hourly statistics
from the output
- Add --disable-error command line option to remove error report from
the output
- Add --exclude-query option to exclude types of queries by specifying
a regex
- Set thousand separator and decimal separator to be locale dependant
- Add -w option to only report errors
- Add Makefile.PL and full POD documentation to the project
- Allow multiple log files from command line
- Add simple csvlog support - Alex Hunsaker
- Hourly report for temporary files and checkpoints have moved in a
separate table.
- Add hourly connections and sessions statistics.
- Add a chart about the number of connections per seconds.
Bug fix:
- Add information about log format requirement (lc_message = 'C').
Reported by Alain Benard.
- Fix for begin/end dates with single digit day using syslog. Patch by
Joseph Marlin.
- Fix handle of syslog dates with single-digit day number. Patch by
Denis Orlikhin.
- Fix many English syntax in error messages and documentation. Patch by
Joseph Marlin.
- Fix non terminated TH html tag in checkpoint hourly table. Reported
by Joseph Marlin.
- "Log file" section will now only report first and last log file parsed
- Fix empty output in hourly temporary file stats.
- Fix wrapping query that goes out of the table and makes the window
scroll horizontally. Asked by Isaac Reuben.
- Fix code where != was replaced by $$CLASSSY0A$$!=$$CLASSSY0B$$ in the
output. Reported by Isaac Reuben
- Fix and review text report output.
- Fix an issue in SQL code highligh replacement.
- Complete review of the HTML output.
- Add .gitignore for swap files. Patch by Vincent Picavet
- Fix wrong variable for user and database filter. Patch by Vincent
Picavet.
- Change default regexp for user and db to be able to detect both. Patch
by Vincent Picavet.
- Fix false cur_date when using syslog and allow -b and -e options to
work. Patch by Vincent Picavet.
- Fix some case where logs where not detected as PostgreSQL log lines.
- Added explanation for --begin and --end datetime setting. Patch by
ragged.
- Added -v / --version. Patch by ragged.
- Fix usage information and presentation in README file.
2012-05-04 - version to 1.0
First public release of pgBadger.
New feature:
- Add graph of ckeckpoint Wal files usage (added, removed, recycled).
- Add --image-format to allow the change of the default png image
format to jpeg.
- Allow download of all pie graphics as images.
- Add --pie-limit to sum all data lower than this percentage limit to
avoid label overlap.
- Allow download of graphics as PNG images.
- Replace GD::Graph by the Flotr2 javascript library to draw graphics.
Patch by Guillaume Lelarge
- Add pie graphs for session, database, user and host. Add a --quiet
option to remove debug output and --progress to show a progress bar
during log parsing
- Add pie graph for Queries by type.
- Add graph for checkpoint write buffer per hours
- Allow log parsing without any log_line_prefix and extend it to be
defined by the user. Custom log_line prefix can be parsed using user
defined regex with command line option --regex-db and --regex-user.
For exemple the default regex of pgbadger to parse user and db name
from log_line_prefix can be written like this:
pgbadger -l mylogfile.log --regex-user="user=([^,]*)," \
--regex-db="db=([^,]*)"
- Separe log_line_prefix from log level part in the parser to extend
log_line_prefix parsing
- If there is just one argument, assume it is the logfile and use
default value for all other parameters
- Add autodetection of log format (syslog or stderr) if none is given
with option -f
- Add --outfile option to dump output to a file instead of stdout.
Default filename is out.html or out.txt following the output format.
To dump to stdout set filename to -
- Add --version command line option to show current pgbadger version.
Bug fix:
- Rearrange x and y axis
- Fix legend opacity on graphics
- Rearrange Overall stats view
- Add more "normalization" on errors messages
- Fix samples error with normalyzed error instead of real error message
- Fix an other average size of temporary file decimal limit
- Force quiet mode when --progress is used
- Fix per sessions graphs
- Fix sort order of days/hours into hours array
- Fix sort order of days into graphics
- Remove display of locks, sessions and connections statistics when none
are available
- Fix display of empty column of checkpoint when no checkpoint was found
in log file
pgbadger-3.3/META.yml 0000644 0001750 0001750 00000000516 12140236270 013676 0 ustar darold darold # http://module-build.sourceforge.net/META-spec.html
#XXXXXXX This is a prototype!!! It will change in the future!!! XXXXX#
name: pgBadger
version: 1.1
version_from: pgbadger
installdirs: site
recommends:
Text::CSV_XS: 0
distribution_type: script
generated_by: ExtUtils::MakeMaker version 6.17
pgbadger-3.3/pgbadger 0000755 0001750 0001750 00001240441 12140236270 014132 0 ustar darold darold #!/usr/bin/perl
#------------------------------------------------------------------------------
#
# pgBadger - Advanced PostgreSQL log analyzer
#
# This program is open source, licensed under the PostgreSQL Licence.
# For license terms, see the LICENSE file.
#------------------------------------------------------------------------------
#
# Settings in postgresql.conf
#
# You should enable SQL query logging with log_min_duration_statement >= 0
# With stderr output
# Log line prefix should be: log_line_prefix = '%t [%p]: [%l-1] '
# Log line prefix should be: log_line_prefix = '%t [%p]: [%l-1] user=%u,db=%d '
# Log line prefix should be: log_line_prefix = '%t [%p]: [%l-1] db=%d,user=%u '
# With syslog output
# Log line prefix should be: log_line_prefix = 'db=%d,user=%u '
#
# Additional information that could be collected and reported
# log_checkpoints = on
# log_connections = on
# log_disconnections = on
# log_lock_waits = on
# log_temp_files = 0
# log_autovacuum_min_duration = 0
#------------------------------------------------------------------------------
use vars qw($VERSION);
use strict qw(vars subs);
use Getopt::Long qw(:config no_ignore_case bundling);
use IO::File;
use Benchmark;
use File::Basename;
use Storable qw(store_fd fd_retrieve);
use Time::Local 'timegm_nocheck';
use POSIX qw(locale_h sys_wait_h _exit);
setlocale(LC_NUMERIC, '');
setlocale(LC_ALL, 'C');
use File::Spec qw/ tmpdir /;
use File::Temp qw/ tempfile /;
use IO::Handle;
use IO::Pipe;
use Time::HiRes qw/usleep/;
$VERSION = '3.3';
$SIG{'CHLD'} = 'DEFAULT';
my $TMP_DIR = File::Spec->tmpdir() || '/tmp';
my %RUNNING_PIDS = ();
my @tempfiles = ();
my $parent_pid = $$;
my $interrupt = 0;
my $tmp_last_parsed = '';
####
# method used to fork as many child as wanted
##
sub spawn
{
my $coderef = shift;
unless (@_ == 0 && $coderef && ref($coderef) eq 'CODE') {
print "usage: spawn CODEREF";
exit 0;
}
my $pid;
if (!defined($pid = fork)) {
print STDERR "Error: cannot fork: $!\n";
return;
} elsif ($pid) {
$RUNNING_PIDS{$pid} = $pid;
return; # the parent
}
# the child -- go spawn
$< = $>;
$( = $); # suid progs only
exit &$coderef();
}
# Informa the parent that it should stop iterate on parsing other files
sub stop_parsing
{
$interrupt = 1;
}
# With multiprocess we need to wait all childs
sub wait_child
{
my $sig = shift;
print STDERR "Received terminating signal ($sig).\n";
if ($^O !~ /MSWin32|dos/i) {
1 while wait != -1;
$SIG{INT} = \&wait_child;
$SIG{TERM} = \&wait_child;
foreach my $f (@tempfiles) {
unlink("$f->[1]") if (-e "$f->[1]");
}
unlink("$tmp_last_parsed") if ($tmp_last_parsed);
}
_exit(0);
}
$SIG{INT} = \&wait_child;
$SIG{TERM} = \&wait_child;
$SIG{USR2} = \&stop_parsing;
$| = 1;
# Command line options
my $zcat_cmd = 'gunzip -c';
my $zcat = $zcat_cmd;
my $bzcat = 'bunzip2 -c';
my $ucat = 'unzip -p';
my $gzip_uncompress_size = "gunzip -l %f | grep -E '^\\s*[0-9]+' | awk '{print \$2}'";
my $zip_uncompress_size = "unzip -l %f | awk '{if (NR==4) print \$1}'";
my $format = '';
my $outfile = '';
my $outdir = '';
my $help = '';
my $ver = '';
my @dbname = ();
my @dbuser = ();
my @dbclient = ();
my @dbclient2 = ();
my @dbappname = ();
my @exclude_user = ();
my $ident = '';
my $top = 0;
my $sample = 0;
my $extension = '';
my $maxlength = 0;
my $graph = 1;
my $nograph = 0;
my $debug = 0;
my $nohighlight = 0;
my $noprettify = 0;
my $from = '';
my $to = '';
my $quiet = 0;
my $progress = 1;
my $error_only = 0;
my @exclude_query = ();
my $exclude_file = '';
my @include_query = ();
my $include_file = '';
my $disable_error = 0;
my $disable_hourly = 0;
my $disable_type = 0;
my $disable_query = 0;
my $disable_session = 0;
my $disable_connection = 0;
my $disable_lock = 0;
my $disable_temporary = 0;
my $disable_checkpoint = 0;
my $disable_autovacuum = 0;
my $avg_minutes = 5;
my $last_parsed = '';
my $report_title = 'pgBadger: PostgreSQL log analyzer';
my $log_line_prefix = '';
my $compiled_prefix = '';
my $project_url = 'http://dalibo.github.com/pgbadger/';
my $t_min = 0;
my $t_max = 0;
my $t_min_hour = 0;
my $t_max_hour = 0;
my $remove_comment = 0;
my $select_only = 0;
my $tsung_queries = 0;
my $queue_size = 0;
my $job_per_file = 0;
my $NUMPROGRESS = 10000;
my @DIMENSIONS = (800, 300);
my $RESRC_URL = '';
my $img_format = 'png';
my @log_files = ();
my %prefix_vars = ();
my $sql_prettified;
# Do not display data in pie where percentage is lower than this value
# to avoid label overlapping.
my $pie_percentage_limit = 2;
# Get the decimal separator
my $n = 5 / 2;
my $num_sep = ',';
$num_sep = ' ' if ($n =~ /,/);
# get the command line parameters
my $result = GetOptions(
"a|average=i" => \$avg_minutes,
"b|begin=s" => \$from,
"c|dbclient=s" => \@dbclient,
"C|nocomment!" => \$remove_comment,
"d|dbname=s" => \@dbname,
"e|end=s" => \$to,
"f|format=s" => \$format,
"G|nograph!" => \$nograph,
"h|help!" => \$help,
"i|ident=s" => \$ident,
"j|jobs=i" => \$queue_size,
"J|job_per_file=i" => \$job_per_file,
"l|last-parsed=s" => \$last_parsed,
"m|maxlength=i" => \$maxlength,
"N|appname=s" => \@dbappname,
"n|nohighlight!" => \$nohighlight,
"o|outfile=s" => \$outfile,
"p|prefix=s" => \$log_line_prefix,
"P|no-prettify!" => \$noprettify,
"q|quiet!" => \$quiet,
"s|sample=i" => \$sample,
"S|select-only!" => \$select_only,
"t|top=i" => \$top,
"T|title=s" => \$report_title,
"u|dbuser=s" => \@dbuser,
"U|exclude-user=s" => \@exclude_user,
"v|verbose!" => \$debug,
"V|version!" => \$ver,
"w|watch-mode!" => \$error_only,
"x|extension=s" => \$extension,
"z|zcat=s" => \$zcat,
"pie-limit=i" => \$pie_percentage_limit,
"image-format=s" => \$img_format,
"exclude-query=s" => \@exclude_query,
"exclude-file=s" => \$exclude_file,
"include-query=s" => \@include_query,
"include-file=s" => \$include_file,
"disable-error!" => \$disable_error,
"disable-hourly!" => \$disable_hourly,
"disable-type!" => \$disable_type,
"disable-query!" => \$disable_query,
"disable-session!" => \$disable_session,
"disable-connection!" => \$disable_connection,
"disable-lock!" => \$disable_lock,
"disable-temporary!" => \$disable_temporary,
"disable-checkpoint!" => \$disable_checkpoint,
"disable-autovacuum!" => \$disable_autovacuum,
"client=s" => \@dbclient2, # Backward compatibility
);
die "FATAL: use pgbadger --help\n" if (not $result);
push(@dbclient, @dbclient2); # Backward compatibility
if ($ver) {
print "pgBadger version $VERSION\n";
exit 0;
}
&usage() if ($help);
# Rewrite some command line argument as lists
&compute_arg_list();
# Log file to be parsed are passed as command line argument
if ($#ARGV >= 0) {
foreach my $file (@ARGV) {
if ($file ne '-') {
die "FATAL: logfile $file must exist!\n" if not -f $file;
if (-z $file) {
print "WARNING: file $file is empty\n";
next;
}
}
push(@log_files, $file);
}
}
# Logfile is a mandatory parameter
if ($#log_files < 0) {
print STDERR "FATAL: you must give a log file as command line parameter.\n\n";
&usage();
}
# Quiet mode is forced with progress bar
$progress = 0 if ($quiet);
# Set the default number minutes for queries and connections average
$avg_minutes ||= 5;
$avg_minutes = 60 if ($avg_minutes > 60);
$avg_minutes = 1 if ($avg_minutes < 1);
# Set syslog prefix regex
my $other_syslog_line =
qr/^(...)\s+(\d+)\s(\d+):(\d+):(\d+)(?:\s[^\s]+)?\s([^\s]+)\s([^\s\[]+)\[(\d+)\]:(?:\s\[[^\]]+\])?\s\[(\d+)\-\d+\]\s*(.*)/;
my $orphan_syslog_line = qr/^(...)\s+(\d+)\s(\d+):(\d+):(\d+)(?:\s[^\s]+)?\s([^\s]+)\s([^\s\[]+)\[(\d+)\]:/;
my $orphan_stderr_line = '';
# Set default format
$format ||= &autodetect_format($log_files[0]);
if ($format eq 'syslog2') {
$other_syslog_line =
qr/^(\d+)-(\d+)-(\d+)T(\d+):(\d+):(\d+)(?:.[^\s]+)?\s([^\s]+)\s([^\s\[]+)\[(\d+)\]:(?:\s\[[^\]]+\])?\s\[(\d+)\-\d+\]\s*(.*)/;
$orphan_syslog_line = qr/^(\d+)-(\d+)-(\d+)T(\d+):(\d+):(\d+)(?:.[^\s]+)?\s([^\s]+)\s([^\s\[]+)\[(\d+)\]:/;
}
# Set default top query
$top ||= 20;
# Set the default number of samples
$sample ||= 3;
# Set the default extension and output format
if (!$extension) {
if ($outfile =~ /\.bin/i) {
$extension = 'binary';
} elsif ($outfile =~ /\.tsung/i) {
$extension = 'tsung';
} elsif ($outfile =~ /\.htm[l]*/i) {
$extension = 'html';
} elsif ($outfile) {
$extension = 'txt';
} else {
$extension = 'html';
}
}
# Set default filename of the output file
$outfile ||= 'out.' . $extension;
&logmsg('DEBUG', "Output '$extension' reports will be written to $outfile");
# Set default syslog ident name
$ident ||= 'postgres';
# Set default pie percentage limit or fix value
$pie_percentage_limit = 0 if ($pie_percentage_limit < 0);
$pie_percentage_limit = 2 if ($pie_percentage_limit eq '');
$pie_percentage_limit = 100 if ($pie_percentage_limit > 100);
# Set default download image format
$img_format = lc($img_format);
$img_format = 'jpeg' if ($img_format eq 'jpg');
$img_format = 'png' if ($img_format ne 'jpeg');
# Extract the output directory from outfile so that graphs will
# be created in the same directory
my @infs = fileparse($outfile);
$outdir = $infs[1] . '/';
# Remove graph support if output is not html
$graph = 0 unless ($extension eq 'html' or $extension eq 'binary' );
$graph = 0 if ($nograph);
# Set some default values
my $end_top = $top - 1;
$queue_size ||= 1;
$job_per_file ||= 1;
if ($^O =~ /MSWin32|dos/i) {
if ( ($queue_size > 1) || ($job_per_file > 1) ) {
print STDERR "WARNING: parallel processing is not supported on this platform.\n";
$queue_size = 1;
$job_per_file = 1;
}
}
if ($extension eq 'tsung') {
# Open filehandle
my $fh = new IO::File ">$outfile";
if (not defined $fh) {
die "FATAL: can't write to $outfile, $!\n";
}
print $fh "\n";
$fh->close();
} else {
# Test file creation before going to parse log
my $tmpfh = new IO::File ">$outfile";
if (not defined $tmpfh) {
die "FATAL: can't write to $outfile, $!\n";
}
$tmpfh->close();
unlink($outfile) if (-e $outfile);
}
# -w and --disable-error can't go together
if ($error_only && $disable_error) {
die "FATAL: please choose between no event report and reporting events only.\n";
}
# Set default search pattern for database and user name in log_line_prefix
my $regex_prefix_dbname = qr/db=([^,]*)/;
my $regex_prefix_dbuser = qr/user=([^,]*)/;
# Loading excluded query from file if any
if ($exclude_file) {
open(IN, "$exclude_file") or die "FATAL: can't read file $exclude_file: $!\n";
my @exclq = ;
close(IN);
chomp(@exclq);
map {s/\r//;} @exclq;
foreach my $r (@exclq) {
&check_regex($r, '--exclude-file');
}
push(@exclude_query, @exclq);
}
# Testing regex syntax
if ($#exclude_query >= 0) {
foreach my $r (@exclude_query) {
&check_regex($r, '--exclude-query');
}
}
# Loading included query from file if any
if ($include_file) {
open(IN, "$include_file") or die "FATAL: can't read file $include_file: $!\n";
my @exclq = ;
close(IN);
chomp(@exclq);
map {s/\r//;} @exclq;
foreach my $r (@exclq) {
&check_regex($r, '--include-file');
}
push(@include_query, @exclq);
}
# Testing regex syntax
if ($#include_query >= 0) {
foreach my $r (@include_query) {
&check_regex($r, '--include-query');
}
}
my @action_regex = (
qr/^\s*(delete) from/is,
qr/^\s*(insert) into/is,
qr/^\s*(update) .*\bset\b/is,
qr/^\s*(select) /is
);
# Compile custom log line prefix prefix
my @prefix_params = ();
if ($log_line_prefix) {
# Build parameters name that will be extracted from the prefix regexp
@prefix_params = &build_log_line_prefix_regex();
&check_regex($log_line_prefix, '--prefix');
if ($format eq 'syslog') {
$log_line_prefix =
'^(...)\s+(\d+)\s(\d+):(\d+):(\d+)(?:\s[^\s]+)?\s([^\s]+)\s([^\s\[]+)\[(\d+)\]:(?:\s\[[^\]]+\])?\s\[(\d+)\-\d+\]\s*'
. $log_line_prefix
. '\s*(LOG|WARNING|ERROR|FATAL|PANIC|DETAIL|STATEMENT|HINT|CONTEXT):\s+(.*)';
$compiled_prefix = qr/$log_line_prefix/;
unshift(@prefix_params, 't_month', 't_day', 't_hour', 't_min', 't_sec', 't_host', 't_ident', 't_pid', 't_session_line');
push(@prefix_params, 't_loglevel', 't_query');
} elsif ($format eq 'syslog2') {
$format = 'syslog';
$log_line_prefix =
'^(\d+)-(\d+)-(\d+)T\d+:\d+:\d+(?:.[^\s]+)?\s([^\s]+)\s([^\s\[]+)\[(\d+)\]:(?:\s\[[^\]]+\])?\s\[(\d+)\-\d+\]\s*'
. $log_line_prefix
. '\s*(LOG|WARNING|ERROR|FATAL|PANIC|DETAIL|STATEMENT|HINT|CONTEXT):\s+(.*)';
$compiled_prefix = qr/$log_line_prefix/;
unshift(@prefix_params, 't_year', 't_month', 't_day', 't_hour', 't_min', 't_sec', 't_host', 't_ident', 't_pid', 't_session_line');
push(@prefix_params, 't_loglevel', 't_query');
} elsif ($format eq 'stderr') {
$orphan_stderr_line = qr/$log_line_prefix/;
$log_line_prefix = '^' . $log_line_prefix . '\s*(LOG|WARNING|ERROR|FATAL|PANIC|DETAIL|STATEMENT|HINT|CONTEXT):\s+(.*)';
$compiled_prefix = qr/$log_line_prefix/;
push(@prefix_params, 't_loglevel', 't_query');
}
} elsif ($format eq 'syslog') {
$compiled_prefix =
qr/^(...)\s+(\d+)\s(\d+):(\d+):(\d+)(?:\s[^\s]+)?\s([^\s]+)\s([^\s\[]+)\[(\d+)\]:(?:\s\[[^\]]+\])?\s\[(\d+)\-\d+\]\s*(.*?)\s*(LOG|WARNING|ERROR|FATAL|PANIC|DETAIL|STATEMENT|HINT|CONTEXT):\s+(.*)/;
push(@prefix_params, 't_month', 't_day', 't_hour', 't_min', 't_sec', 't_host', 't_ident', 't_pid', 't_session_line',
't_logprefix', 't_loglevel', 't_query');
} elsif ($format eq 'syslog2') {
$format = 'syslog';
$compiled_prefix =
qr/^(\d+)-(\d+)-(\d+)T(\d+):(\d+):(\d+)(?:.[^\s]+)?\s([^\s]+)\s([^\s\[]+)\[(\d+)\]:(?:\s\[[^\]]+\])?\s\[(\d+)\-\d+\]\s*(.*?)\s*(LOG|WARNING|ERROR|FATAL|PANIC|DETAIL|STATEMENT|HINT|CONTEXT):\s+(.*)/;
push(@prefix_params, 't_year', 't_month', 't_day', 't_hour', 't_min', 't_sec', 't_host', 't_ident', 't_pid', 't_session_line',
't_logprefix', 't_loglevel', 't_query');
} elsif ($format eq 'stderr') {
$compiled_prefix =
qr/^(\d+-\d+-\d+\s\d+:\d+:\d+)[\.\d]*(?: [A-Z\d]{3,6})?\s\[(\d+)\]:\s\[(\d+)\-\d+\]\s*(.*?)\s*(LOG|WARNING|ERROR|FATAL|PANIC|DETAIL|STATEMENT|HINT|CONTEXT):\s+(.*)/;
push(@prefix_params, 't_timestamp', 't_pid', 't_session_line', 't_logprefix', 't_loglevel', 't_query');
$orphan_stderr_line = qr/^(\d+-\d+-\d+\s\d+:\d+:\d+)[\.\d]*(?: [A-Z\d]{3,6})?\s\[(\d+)\]:\s\[(\d+)\-\d+\]\s*(.*?)\s*/;
}
sub check_regex
{
my ($pattern, $varname) = @_;
eval {m/$pattern/i;};
if ($@) {
die "FATAL: '$varname' invalid regex '$pattern', $!\n";
}
}
# Check start/end date time
if ($from) {
if ($from !~ /^(\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2})$/) {
die "FATAL: bad format for begin datetime, should be yyyy-mm-dd hh:mm:ss\n";
}
}
if ($to) {
if ($to !~ /^(\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2})$/) {
die "FATAL: bad format for ending datetime, should be yyyy-mm-dd hh:mm:ss\n";
}
}
# Stores the last parsed line from log file to allow incremental parsing
my $LAST_LINE = '';
# Set the level of the data aggregator, can be minute, hour or day follow the
# size of the log file.
my $LEVEL = 'hour';
# Month names
my %month_abbr = (
'Jan' => '01', 'Feb' => '02', 'Mar' => '03', 'Apr' => '04', 'May' => '05', 'Jun' => '06',
'Jul' => '07', 'Aug' => '08', 'Sep' => '09', 'Oct' => '10', 'Nov' => '11', 'Dec' => '12'
);
my %abbr_month = (
'01' => 'Jan', '02' => 'Feb', '03' => 'Mar', '04' => 'Apr', '05' => 'May', '06' => 'Jun',
'07' => 'Jul', '08' => 'Aug', '09' => 'Sep', '10' => 'Oct', '11' => 'Nov', '12' => 'Dec'
);
# Keywords variable
my @pg_keywords = qw(
ALL ANALYSE ANALYZE AND ANY ARRAY AS ASC ASYMMETRIC AUTHORIZATION BINARY BOTH CASE
CAST CHECK COLLATE COLLATION COLUMN CONCURRENTLY CONSTRAINT CREATE CROSS
CURRENT_DATE CURRENT_ROLE CURRENT_TIME CURRENT_TIMESTAMP CURRENT_USER
DEFAULT DEFERRABLE DESC DISTINCT DO ELSE END EXCEPT FALSE FETCH FOR FOREIGN FREEZE FROM
FULL GRANT GROUP HAVING ILIKE IN INITIALLY INNER INTERSECT INTO IS ISNULL JOIN LEADING
LEFT LIKE LIMIT LOCALTIME LOCALTIMESTAMP NATURAL NOT NOTNULL NULL ON ONLY OPEN OR
ORDER OUTER OVER OVERLAPS PLACING PRIMARY REFERENCES RETURNING RIGHT SELECT SESSION_USER
SIMILAR SOME SYMMETRIC TABLE THEN TO TRAILING TRUE UNION UNIQUE USER USING VARIADIC
VERBOSE WHEN WHERE WINDOW WITH
);
# Highlight variables
my @KEYWORDS1 = qw(
ALTER ADD AUTO_INCREMENT BETWEEN BY BOOLEAN BEGIN CHANGE COLUMNS COMMIT COALESCE CLUSTER
COPY DATABASES DATABASE DATA DELAYED DESCRIBE DELETE DROP ENCLOSED ESCAPED EXISTS EXPLAIN
FIELDS FIELD FLUSH FUNCTION GREATEST IGNORE INDEX INFILE INSERT IDENTIFIED IF INHERIT
KEYS KILL KEY LINES LOAD LOCAL LOCK LOW_PRIORITY LANGUAGE LEAST LOGIN MODIFY
NULLIF NOSUPERUSER NOCREATEDB NOCREATEROLE OPTIMIZE OPTION OPTIONALLY OUTFILE OWNER PROCEDURE
PROCEDURAL READ REGEXP RENAME RETURN REVOKE RLIKE ROLE ROLLBACK SHOW SONAME STATUS
STRAIGHT_JOIN SET SEQUENCE TABLES TEMINATED TRUNCATE TEMPORARY TRIGGER TRUSTED UNLOCK
USE UPDATE UNSIGNED VALUES VARIABLES VIEW VACUUM WRITE ZEROFILL XOR
ABORT ABSOLUTE ACCESS ACTION ADMIN AFTER AGGREGATE ALSO ALWAYS ASSERTION ASSIGNMENT AT ATTRIBUTE
BACKWARD BEFORE BIGINT CACHE CALLED CASCADE CASCADED CATALOG CHAIN CHARACTER CHARACTERISTICS
CHECKPOINT CLOSE COMMENT COMMENTS COMMITTED CONFIGURATION CONNECTION CONSTRAINTS CONTENT
CONTINUE CONVERSION COST CSV CURRENT CURSOR CYCLE DAY DEALLOCATE DEC DECIMAL DECLARE DEFAULTS
DEFERRED DEFINER DELIMITER DELIMITERS DICTIONARY DISABLE DISCARD DOCUMENT DOMAIN DOUBLE EACH
ENABLE ENCODING ENCRYPTED ENUM ESCAPE EXCLUDE EXCLUDING EXCLUSIVE EXECUTE EXTENSION EXTERNAL
FIRST FLOAT FOLLOWING FORCE FORWARD FUNCTIONS GLOBAL GRANTED HANDLER HEADER HOLD
HOUR IDENTITY IMMEDIATE IMMUTABLE IMPLICIT INCLUDING INCREMENT INDEXES INHERITS INLINE INOUT INPUT
INSENSITIVE INSTEAD INT INTEGER INVOKER ISOLATION LABEL LARGE LAST LC_COLLATE LC_CTYPE
LEAKPROOF LEVEL LISTEN LOCATION LOOP MAPPING MATCH MAXVALUE MINUTE MINVALUE MODE MONTH MOVE NAMES
NATIONAL NCHAR NEXT NO NONE NOTHING NOTIFY NOWAIT NULLS OBJECT OF OFF OIDS OPERATOR OPTIONS
OUT OWNED PARSER PARTIAL PARTITION PASSING PASSWORD PLANS PRECEDING PRECISION PREPARE
PREPARED PRESERVE PRIOR PRIVILEGES QUOTE RANGE REAL REASSIGN RECHECK RECURSIVE REF REINDEX RELATIVE
RELEASE REPEATABLE REPLICA RESET RESTART RESTRICT RETURNS ROW ROWS RULE SAVEPOINT SCHEMA SCROLL SEARCH
SECOND SECURITY SEQUENCES SERIALIZABLE SERVER SESSION SETOF SHARE SIMPLE SMALLINT SNAPSHOT STABLE
STANDALONE START STATEMENT STATISTICS STORAGE STRICT SYSID SYSTEM TABLESPACE TEMP
TEMPLATE TRANSACTION TREAT TYPE TYPES UNBOUNDED UNCOMMITTED UNENCRYPTED
UNKNOWN UNLISTEN UNLOGGED UNTIL VALID VALIDATE VALIDATOR VALUE VARYING VOLATILE
WHITESPACE WITHOUT WORK WRAPPER XMLATTRIBUTES XMLCONCAT XMLELEMENT XMLEXISTS XMLFOREST XMLPARSE
XMLPI XMLROOT XMLSERIALIZE YEAR YES ZONE
);
foreach my $k (@pg_keywords) {
push(@KEYWORDS1, $k) if (!grep(/^$k$/i, @KEYWORDS1));
}
my @KEYWORDS2 = (
'ascii', 'age',
'bit_length', 'btrim',
'char_length', 'character_length', 'convert', 'chr', 'current_date', 'current_time', 'current_timestamp', 'count',
'decode', 'date_part', 'date_trunc',
'encode', 'extract',
'get_byte', 'get_bit',
'initcap', 'isfinite', 'interval',
'justify_hours', 'justify_days',
'lower', 'length', 'lpad', 'ltrim', 'localtime', 'localtimestamp',
'md5',
'now',
'octet_length', 'overlay',
'position', 'pg_client_encoding',
'quote_ident', 'quote_literal',
'repeat', 'replace', 'rpad', 'rtrim',
'substring', 'split_part', 'strpos', 'substr', 'set_byte', 'set_bit',
'trim', 'to_ascii', 'to_hex', 'translate', 'to_char', 'to_date', 'to_timestamp', 'to_number', 'timeofday',
'upper',
);
my @KEYWORDS3 = ('STDIN', 'STDOUT');
my %SYMBOLS = (
'=' => '=', '<' => '<', '>' => '>', '\|' => '|', ',' => ',', '\.' => '.', '\+' => '+', '\-' => '-', '\*' => '*',
'\/' => '/', '!=' => '!='
);
my @BRACKETS = ('(', ')');
map {$_ = quotemeta($_)} @BRACKETS;
# Where statistics are stored
my %overall_stat = ();
my @top_slowest = ();
my %normalyzed_info = ();
my %error_info = ();
my %logs_type = ();
my %per_hour_info = ();
my %per_minute_info = ();
my %lock_info = ();
my %tempfile_info = ();
my %connection_info = ();
my %database_info = ();
my %application_info = ();
my %session_info = ();
my %conn_received = ();
my %checkpoint_info = ();
my %restartpoint_info = ();
my %autovacuum_info = ();
my %autoanalyze_info = ();
my @graph_values = ();
my %cur_info = ();
my %cur_temp_info = ();
my %cur_lock_info = ();
my $nlines = 0;
my %last_line = ();
our %saved_last_line = ();
my %tsung_session = ();
my @top_locked_info = ();
my @top_tempfile_info = ();
my $t0 = Benchmark->new;
# Reading last line parsed
if ($last_parsed && -e $last_parsed) {
if (open(IN, "$last_parsed")) {
my $line = ;
close(IN);
($saved_last_line{datetime}, $saved_last_line{orig}) = split(/\t/, $line, 2);
} else {
die "FATAL: can't read last parsed line from $last_parsed, $!\n";
}
}
$tmp_last_parsed = 'tmp_' . $last_parsed if ($last_parsed);
# Main loop reading log files
my $global_totalsize = 0;
my @given_log_files = ( @log_files );
# log files must be erase when loading stats from binary format
if ($format eq 'binary') {
$queue_size = 1;
$job_per_file = 1;
@log_files = ();
}
my $pipe;
# Start parsing all given files using multiprocess
if ( ($queue_size > 1) || ($job_per_file > 1) ) {
# Number of running process
my $child_count = 0;
# Set max number of parallel process
my $parallel_process = $queue_size;
if ($job_per_file > 1) {
$parallel_process = $job_per_file;
}
# Store total size of the log files
foreach my $logfile ( @given_log_files ) {
$global_totalsize += &get_log_file($logfile);
}
# Open a pipe for interprocess communication
my $reader = new IO::Handle;
my $writer = new IO::Handle;
$pipe = IO::Pipe->new($reader, $writer);
$writer->autoflush(1);
# Fork the logger process
if ($progress) {
spawn sub {
&multiprocess_progressbar($global_totalsize);
};
}
# Parse each log file following the multiprocess mode chosen (-j or -J)
foreach my $logfile ( @given_log_files ) {
while ($child_count >= $parallel_process) {
my $kid = waitpid(-1, WNOHANG);
if ($kid > 0) {
$child_count--;
delete $RUNNING_PIDS{$kid};
}
usleep(500000);
}
# Do not use split method with compressed files
if ( ($queue_size > 1) && ($logfile !~ /\.(gz|bz2|zip)/i) ) {
# Create multiple process to parse one log file by chunks of data
my @chunks = &split_logfile($logfile);
for (my $i = 0; $i < $#chunks; $i++) {
while ($child_count >= $parallel_process) {
my $kid = waitpid(-1, WNOHANG);
if ($kid > 0) {
$child_count--;
delete $RUNNING_PIDS{$kid};
}
usleep(500000);
}
push(@tempfiles, [ tempfile('tmp_pgbadgerXXXX', SUFFIX => '.bin', DIR => $TMP_DIR, UNLINK => 1 ) ]);
spawn sub {
&process_file($logfile, $tempfiles[-1]->[0], $chunks[$i], $chunks[$i+1]);
};
$child_count++;
}
} else {
# Start parsing one file per parallel process
push(@tempfiles, [ tempfile('tmp_pgbadgerXXXX', SUFFIX => '.bin', DIR => $TMP_DIR, UNLINK => 1 ) ]);
spawn sub {
&process_file($logfile, $tempfiles[-1]->[0]);
};
$child_count++;
}
last if ($interrupt);
}
my $minproc = 1;
$minproc = 0 if (!$progress);
# Wait for all child dies less the logger
while (scalar keys %RUNNING_PIDS > $minproc) {
my $kid = waitpid(-1, WNOHANG);
if ($kid > 0) {
delete $RUNNING_PIDS{$kid};
}
usleep(500000);
}
# Terminate the process logger
foreach my $k (keys %RUNNING_PIDS) {
kill(10, $k);
%RUNNING_PIDS = ();
}
# Load all data gathered by all the differents processes
&init_stats_vars();
foreach my $f (@tempfiles) {
next if (!-e "$f->[1]" || -z "$f->[1]");
my $fht = new IO::File;
$fht->open("< $f->[1]") or die "FATAL: can't open file $f->[1], $!\n";
&load_stats($fht);
$fht->close();
}
# Get last line parsed from all process
if ($last_parsed) {
if (open(IN, "$tmp_last_parsed") ) {
while (my $line = ) {
chomp($line);
my ($d, $l) = split(/\t/, $line, 2);
if (!$last_line{datetime} || ($d gt $last_line{datetime})) {
$last_line{datetime} = $d;
$last_line{orig} = $l;
}
}
close(IN);
}
unlink("$tmp_last_parsed");
}
} else {
# Multiprocessing disabled, parse log files one by one
foreach my $logfile ( @given_log_files ) {
last if (&process_file($logfile));
}
}
# Save last line parsed
if ($last_parsed && scalar keys %last_line) {
if (open(OUT, ">$last_parsed")) {
print OUT "$last_line{datetime}\t$last_line{orig}\n";
close(OUT);
} else {
&logmsg('ERROR', "can't save last parsed line into $last_parsed, $!");
}
}
my $t1 = Benchmark->new;
my $td = timediff($t1, $t0);
&logmsg('DEBUG', "the log statistics gathering took:" . timestr($td));
&logmsg('LOG', "Ok, generating $extension report...");
# Open filehandle
my $fh = undef;
if ($extension ne 'tsung') {
$fh = new IO::File ">$outfile";
if (not defined $fh) {
die "FATAL: can't write to $outfile, $!\n";
}
if (($extension eq 'text') || ($extension eq 'txt')) {
if ($error_only) {
&dump_error_as_text();
} else {
&dump_as_text();
}
} elsif ($extension eq 'binary') {
&dump_as_binary($fh);
} else {
# Create instance to prettify SQL query
if (!$noprettify) {
$sql_prettified = SQL::Beautify->new(keywords => \@pg_keywords);
}
if ($error_only) {
&dump_error_as_html();
} else {
&dump_as_html();
}
}
$fh->close;
} else {
# Open filehandle
$fh = new IO::File ">>$outfile";
if (not defined $fh) {
die "FATAL: can't write to $outfile, $!\n";
}
print $fh "\n";
$fh->close();
}
my $t2 = Benchmark->new;
$td = timediff($t2, $t1);
&logmsg('DEBUG', "building reports took:" . timestr($td));
$td = timediff($t2, $t0);
&logmsg('DEBUG', "the total execution time took:" . timestr($td));
exit 0;
#-------------------------------------------------------------------------------
# Show pgBadger command line usage
sub usage
{
print qq{
Usage: pgbadger [options] logfile [...]
PostgreSQL log analyzer with fully detailed reports and graphs.
Arguments:
logfile can be a single log file, a list of files, or a shell command
returning a list of files. If you want to pass log content from stdin
use - as filename. Note that input from stdin will not work with csvlog.
Options:
-a | --average minutes : number of minutes to build the average graphs of
queries and connections.
-b | --begin datetime : start date/time for the data to be parsed in log.
-c | --dbclient host : only report on entries for the given client host.
-C | --nocomment : remove comments like /* ... */ from queries.
-d | --dbname database : only report on entries for the given database.
-e | --end datetime : end date/time for the data to be parsed in log.
-f | --format logtype : possible values: syslog,stderr,csv. Default: stderr.
-G | --nograph : disable graphs on HTML output. Enable by default.
-h | --help : show this message and exit.
-i | --ident name : programname used as syslog ident. Default: postgres
-j | --jobs number : number of jobs to run at same time. Default is 1,
run as single process.
-l | --last-parsed file: allow incremental log parsing by registering the
last datetime and line parsed. Useful if you want
to watch errors since last run or if you want one
report per day with a log rotated each week.
-m | --maxlength size : maximum length of a query, it will be restricted to
the given size. Default: no truncate
-n | --nohighlight : disable SQL code highlighting.
-N | --appname name : only report on entries for given application name
-o | --outfile filename: define the filename for the output. Default depends
on the output format: out.html, out.txt or out.tsung.
To dump output to stdout use - as filename.
-p | --prefix string : give here the value of your custom log_line_prefix
defined in your postgresql.conf. Only use it if you
aren't using one of the standard prefixes specified
in the pgBadger documentation, such as if your prefix
includes additional variables like client ip or
application name. See examples below.
-P | --no-prettify : disable SQL queries prettify formatter.
-q | --quiet : don't print anything to stdout, even not a progress bar.
-s | --sample number : number of query samples to store/display. Default: 3
-S | --select-only : use it if you want to report select queries only.
-t | --top number : number of queries to store/display. Default: 20
-T | --title string : change title of the HTML page report.
-u | --dbuser username : only report on entries for the given user.
-U | --exclude-user username : exclude entries for the specified user from report.
-v | --verbose : enable verbose or debug mode. Disabled by default.
-V | --version : show pgBadger version and exit.
-w | --watch-mode : only report errors just like logwatch could do.
-x | --extension : output format. Values: text, html or tsung. Default: html
-z | --zcat exec_path : set the full path to the zcat program. Use it if
zcat or bzcat or unzip is not on your path.
--pie-limit num : pie data lower than num% will show a sum instead.
--exclude-query regex : any query matching the given regex will be excluded
from the report. For example: "^(VACUUM|COMMIT)"
You can use this option multiple times.
--exclude-file filename: path of the file which contains all the regex to use
to exclude queries from the report. One regex per line.
--include-query regex : any query that does not match the given regex will be
excluded from the report. For example: "(table_1|table_2)"
You can use this option multiple times.
--include-file filename: path of the file which contains all the regex of the
queries to include from the report. One regex per line.
--disable-error : do not generate error report.
--disable-hourly : do not generate hourly report.
--disable-type : do not generate query type report.
--disable-query : do not generate query reports (slowest, most
frequent, ...).
--disable-session : do not generate session report.
--disable-connection : do not generate connection report.
--disable-lock : do not generate lock report.
--disable-temporary : do not generate temporary report.
--disable-checkpoint : do not generate checkpoint/restartpoint report.
--disable-autovacuum : do not generate autovacuum report.
Examples:
pgbadger /var/log/postgresql.log
pgbadger /var/log/postgres.log.2.gz /var/log/postgres.log.1.gz \
/var/log/postgres.log
pgbadger /var/log/postgresql/postgresql-2012-05-*
pgbadger --exclude-query="^(COPY|COMMIT)" /var/log/postgresql.log
pgbadger -b "2012-06-25 10:56:11" -e "2012-06-25 10:59:11" \
/var/log/postgresql.log
cat /var/log/postgres.log | pgbadger -
# log prefix with stderr log output
perl pgbadger --prefix '%t [%p]: [%l-1] user=%u,db=%d,client=%h' \
/pglog/postgresql-2012-08-21*
perl pgbadger --prefix '%m %u@%d %p %r %a : ' /pglog/postgresql.log
# Log line prefix with syslog log output
perl pgbadger --prefix 'user=%u,db=%d,client=%h,appname=%a' \
/pglog/postgresql-2012-08-21*
# Use my 8 CPUs to parse my 10GB file faster, really faster
perl pgbadger -j 8 /pglog/postgresql-9.1-main.log
Generate Tsung sessions XML file with select queries only:
perl pgbadger -S -o sessions.tsung --prefix '%t [%p]: [%l-1] user=%u,db=%d ' /pglog/postgresql-9.1.log
Reporting errors every week by cron job:
30 23 * * 1 /usr/bin/pgbadger -q -w /var/log/postgresql.log -o /var/reports/pg_errors.html
Generate report every week using incremental behavior:
0 4 * * 1 /usr/bin/pgbadger -q `find /var/log/ -mtime -7 -name "postgresql.log*"` \
-o /var/reports/pg_errors-`date +%F`.html -l /var/reports/pgbadger_incremental_file.dat
This supposes that your log file and HTML report are also rotated every week.
};
exit 0;
}
sub init_stats_vars
{
# Empty where statistics are stored
%overall_stat = ();
@top_slowest = ();
%normalyzed_info = ();
%error_info = ();
%logs_type = ();
%per_hour_info = ();
%per_minute_info = ();
%lock_info = ();
%tempfile_info = ();
%connection_info = ();
%database_info = ();
%application_info = ();
%session_info = ();
%conn_received = ();
%checkpoint_info = ();
%restartpoint_info = ();
%autovacuum_info = ();
%autoanalyze_info = ();
@graph_values = ();
%cur_info = ();
$nlines = 0;
%tsung_session = ();
}
####
# Main function called per each parser process
####
sub multiprocess_progressbar
{
my $totalsize = shift;
&logmsg('DEBUG', "Starting progressbar writer process");
$0 = 'pgbadger logger';
# Terminate the process when we doesn't read the complete file but must exit
local $SIG{USR1} = sub {
print STDERR "\n";
exit 0;
};
my $timeout = 3;
my $cursize = 0;
my $nqueries = 0;
my $nerrors = 0;
$pipe->reader();
while (my $r = <$pipe>) {
chomp($r);
my @infos = split(/\s+/, $r);
$cursize += $infos[0];
$nqueries += $infos[1];
$nerrors += $infos[2];
$cursize = $totalsize if ($cursize > $totalsize);
print STDERR &progress_bar($cursize, $totalsize, 25, '=', $nqueries, $nerrors);
last if ($cursize >= $totalsize);
}
print STDERR "\n";
exit 0;
}
####
# Main function called per each parser process
####
sub process_file
{
my ($logfile, $tmpoutfile, $start_offset, $stop_offset) = @_;
my $old_queries_count = 0;
my $old_errors_count = 0;
my $current_offset = $start_offset || 0;
my $getout = 0;
$0 = 'pgbadger parser';
&init_stats_vars() if ($tmpoutfile);
&logmsg('DEBUG', "Starting to parse log file: $logfile");
my $terminate = 0;
local $SIG{INT} = sub { $terminate = 1 };
local $SIG{TERM} = sub { $terminate = 1 };
my $curdate = localtime(time);
$pipe->writer() if (defined $pipe);
# Syslog does not have year information, so take care of year overlapping
my ($gsec, $gmin, $ghour, $gmday, $gmon, $gyear, $gwday, $gyday, $gisdst) = localtime(time);
$gyear += 1900;
my $CURRENT_DATE = $gyear . sprintf("%02d", $gmon + 1) . sprintf("%02d", $gmday);
my $cursize = 0;
# Get file handle and size of the file
my ($lfile, $totalsize) = &get_log_file($logfile);
if ($stop_offset > 0) {
$totalsize = $stop_offset - $start_offset;
}
&logmsg('DEBUG', "Starting reading file $logfile...");
if ($format eq 'csv') {
require Text::CSV_XS;
my $csv = Text::CSV_XS->new({binary => 1, eol => $/});
# Parse csvlog lines
while (my $row = $csv->getline($lfile)) {
# We received a signal
last if ($terminate);
# Set progress statistics
$cursize += length(join(',', @$row));
$nlines++;
if (!$tmpoutfile) {
if ($progress && (($nlines % $NUMPROGRESS) == 0)) {
if ($totalsize) {
print STDERR &progress_bar($cursize, $totalsize, 25, '=');
} else {
print STDERR ".";
}
}
} else {
if ($progress && (($nlines % $NUMPROGRESS) == 0)) {
$pipe->print("$cursize " . ($overall_stat{'queries_number'} - $old_queries_count) . " " . ($overall_stat{'errors_number'} - $old_errors_count) . "\n");
$old_queries_count = $overall_stat{'queries_number'};
$old_errors_count = $overall_stat{'errors_number'};
$cursize = 0;
}
}
# Process only relevant lines
next if ($row->[11] !~ /^(LOG|WARNING|ERROR|FATAL|PANIC|DETAIL|STATEMENT|HINT|CONTEXT)$/);
# Extract the date
$row->[0] =~ m/^(\d+)-(\d+)-(\d+)\s+(\d+):(\d+):(\d+)\.(\d+)/;
my $milli = $7 || 0;
($prefix_vars{'t_year'}, $prefix_vars{'t_month'}, $prefix_vars{'t_day'}, $prefix_vars{'t_hour'}, $prefix_vars{'t_min'}, $prefix_vars{'t_sec'}) = ($1, $2, $3, $4, $5, $6);
$prefix_vars{'t_timestamp'} = "$1-$2-$3 $4:$5:$6";
# Skip unwanted lines
next if ($from && ($from gt $prefix_vars{'t_timestamp'}));
if ($to && ($to lt $prefix_vars{'t_timestamp'})) {
if ($tmpoutfile) {
$pipe->print("$cursize " . ($overall_stat{'queries_number'} - $old_queries_count) . " " . ($overall_stat{'errors_number'} - $old_errors_count) . "\n");
$old_queries_count = $overall_stat{'queries_number'};
$old_errors_count = $overall_stat{'errors_number'};
$cursize = 0;
}
$getout = 1;
last;
}
# Jump to the last line parsed if required
next if (!&check_incremental_position($prefix_vars{'t_timestamp'}, join(',', @$row)));
# Store the current timestamp of the log line
&store_current_timestamp($prefix_vars{'t_timestamp'});
# Set query parameters as global variables
$prefix_vars{'t_dbuser'} = $row->[1] || '';
$prefix_vars{'t_dbname'} = $row->[2] || '';
$prefix_vars{'t_appname'} = $row->[22] || '';
$prefix_vars{'t_client'} = $row->[4] || '';
$prefix_vars{'t_client'} =~ s/:.*//;
$prefix_vars{'t_host'} = 'csv';
$prefix_vars{'t_pid'} = $row->[3];
$prefix_vars{'t_session_line'} = $row->[5];
$prefix_vars{'t_session_line'} =~ s/\..*//;
$prefix_vars{'t_loglevel'} = $row->[11];
$prefix_vars{'t_query'} = $row->[13];
# Set ERROR additional informations
$prefix_vars{'t_detail'} = $row->[14];
$prefix_vars{'t_hint'} = $row->[15];
$prefix_vars{'t_context'} = $row->[18];
$prefix_vars{'t_statement'} = $row->[19];
# Check if the log line should be excluded from the report
if (&validate_log_line($prefix_vars{'t_pid'})) {
# Parse the query now
&parse_query();
&store_queries($prefix_vars{'t_pid'});
delete $cur_info{$prefix_vars{'t_pid'}};
}
}
if (!$getout) {
$csv->eof or warn "FATAL: cannot use CSV, " . $csv->error_diag() . "\n";
}
}
elsif ($format eq 'binary') {
&load_stats($lfile);
}
else { # Format is not CSV.
my $time_pattern = qr/(\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2})/;
my $cur_pid = '';
my @matches = ();
my $goon = 0;
if ($start_offset) {
$lfile->seek($start_offset, 0);
}
while (my $line = <$lfile>) {
# We received a signal
last if ($terminate);
$cursize += length($line);
$current_offset += length($line);
chomp($line);
$line =~ s/\r//;
$nlines++;
next if (!$line);
if (!$tmpoutfile) {
if ($progress && (($nlines % $NUMPROGRESS) == 0)) {
if ($totalsize) {
if ($stop_offset > 0) {
print STDERR &progress_bar($cursize - $start_offset, $stop_offset, 25, '=');
} else {
print STDERR &progress_bar($cursize, $totalsize, 25, '=');
}
} else {
print STDERR ".";
}
}
} else {
if ($progress && (($nlines % $NUMPROGRESS) == 0)) {
$pipe->print("$cursize " . ($overall_stat{'queries_number'} - $old_queries_count) . " " . ($overall_stat{'errors_number'} - $old_errors_count) . "\n");
$old_queries_count = $overall_stat{'queries_number'};
$old_errors_count = $overall_stat{'errors_number'};
$cursize = 0;
}
}
%prefix_vars = ();
# Parse syslog lines
if ($format =~ /syslog/) {
@matches = ($line =~ $compiled_prefix);
if ($#matches >= 0) {
for (my $i = 0 ; $i <= $#prefix_params ; $i++) {
$prefix_vars{$prefix_params[$i]} = $matches[$i];
}
# skip non postgresql lines
next if ($prefix_vars{'t_ident'} ne $ident);
# Standard syslog format does not have year information, months are
# three letters and day are not always with 2 digit.
if ($prefix_vars{'t_month'} !~ /\d/) {
$prefix_vars{'t_year'} = $gyear;
$prefix_vars{'t_day'} = sprintf("%02d", $prefix_vars{'t_day'});
$prefix_vars{'t_month'} = $month_abbr{$prefix_vars{'t_month'}};
# Take care of year overlapping
if ("$prefix_vars{'t_year'}$prefix_vars{'t_month'}$prefix_vars{'t_day'}" > $CURRENT_DATE) {
$prefix_vars{'t_year'} = substr($CURRENT_DATE, 0, 4) - 1;
}
}
$prefix_vars{'t_timestamp'} =
"$prefix_vars{'t_year'}-$prefix_vars{'t_month'}-$prefix_vars{'t_day'} $prefix_vars{'t_hour'}:$prefix_vars{'t_min'}:$prefix_vars{'t_sec'}";
# Skip unwanted lines
next if ($from && ($from gt $prefix_vars{'t_timestamp'}));
if ($to && ($to lt $prefix_vars{'t_timestamp'})) {
if ($tmpoutfile) {
$pipe->print("$cursize " . ($overall_stat{'queries_number'} - $old_queries_count) . " " . ($overall_stat{'errors_number'} - $old_errors_count) . "\n");
$old_queries_count = $overall_stat{'queries_number'};
$old_errors_count = $overall_stat{'errors_number'};
$cursize = 0;
}
$getout = 1;
last;
}
# Jump to the last line parsed if required
next if (!&check_incremental_position($prefix_vars{'t_timestamp'}, $line));
$cur_pid = $prefix_vars{'t_pid'};
$goon = 1;
# Store the current timestamp of the log line
&store_current_timestamp($prefix_vars{'t_timestamp'});
# Extract information from log line prefix
if (!$log_line_prefix) {
&parse_log_prefix($prefix_vars{'t_logprefix'});
}
# Check if the log line should be excluded from the report
if (&validate_log_line($prefix_vars{'t_pid'})) {
# Process the log line
&parse_query();
}
} elsif ($goon && ($line =~ $other_syslog_line)) {
$cur_pid = $8;
my $t_query = $10;
$t_query = $11 if ($format eq 'syslog-ng');
$t_query =~ s/#011/\t/g;
next if ($t_query eq "\t");
if ($cur_info{$cur_pid}{vacuum} && ($t_query =~ /^\t(pages|tuples|buffer usage|avg read rate|system usage):/)) {
if ($t_query =~ /^\t(pages|tuples): (\d+) removed, (\d+) remain/) {
$autovacuum_info{tables}{$cur_info{$cur_pid}{vacuum}}{$1}{removed} += $2;
}
next;
} elsif ( $cur_info{$cur_pid}{parameters} && (($t_query =~ /[,\s]*\$(\d+)\s=\s/) || ($t_query =~ /^('[^']*')$/)) ) {
# stores bind parameters if any
$cur_info{$cur_pid}{parameters} .= " $t_query";
next;
}
if ($cur_info{$cur_pid}{statement}) {
$cur_info{$cur_pid}{statement} .= "\n" . $t_query;
} elsif ($cur_info{$cur_pid}{context}) {
$cur_info{$cur_pid}{context} .= "\n" . $t_query;
} elsif ($cur_info{$cur_pid}{detail}) {
$cur_info{$cur_pid}{detail} .= "\n" . $t_query;
} else {
$cur_info{$cur_pid}{query} .= "\n" . $t_query;
}
# Collect orphans lines of multiline queries
} elsif ($cur_pid && ($line !~ $orphan_syslog_line)) {
if ($cur_info{$cur_pid}{statement}) {
$cur_info{$cur_pid}{statement} .= "\n" . $line;
} elsif ($cur_info{$cur_pid}{context}) {
$cur_info{$cur_pid}{context} .= "\n" . $line;
} elsif ($cur_info{$cur_pid}{detail}) {
$cur_info{$cur_pid}{detail} .= "\n" . $line;
} else {
$cur_info{$cur_pid}{query} .= "\n" . $line;
}
} else {
&logmsg('DEBUG', "Unknown syslog line format: $line");
}
} elsif ($format eq 'stderr') {
@matches = ($line =~ $compiled_prefix);
if ($#matches >= 0) {
for (my $i = 0 ; $i <= $#prefix_params ; $i++) {
$prefix_vars{$prefix_params[$i]} = $matches[$i];
}
if (!$prefix_vars{'t_timestamp'} && $prefix_vars{'t_mtimestamp'}) {
$prefix_vars{'t_timestamp'} = $prefix_vars{'t_mtimestamp'};
} elsif (!$prefix_vars{'t_timestamp'} && $prefix_vars{'t_session_timestamp'}) {
$prefix_vars{'t_timestamp'} = $prefix_vars{'t_session_timestamp'};
}
($prefix_vars{'t_year'}, $prefix_vars{'t_month'}, $prefix_vars{'t_day'}, $prefix_vars{'t_hour'},
$prefix_vars{'t_min'}, $prefix_vars{'t_sec'}) = ($prefix_vars{'t_timestamp'} =~ $time_pattern);
# Skip unwanted lines
next if ($from && ($from gt $prefix_vars{'t_timestamp'}));
if ($to && ($to lt $prefix_vars{'t_timestamp'})) {
if ($tmpoutfile) {
$pipe->print("$cursize " . ($overall_stat{'queries_number'} - $old_queries_count) . " " . ($overall_stat{'errors_number'} - $old_errors_count) . "\n");
$old_queries_count = $overall_stat{'queries_number'};
$old_errors_count = $overall_stat{'errors_number'};
$cursize = 0;
}
$getout = 1;
last;
}
# Jump to the last line parsed if required
next if (!&check_incremental_position($prefix_vars{'t_timestamp'}, $line));
$cur_pid = $prefix_vars{'t_pid'};
# Store the current timestamp of the log line
&store_current_timestamp($prefix_vars{'t_timestamp'});
# Extract information from log line prefix
if (!$log_line_prefix) {
&parse_log_prefix($prefix_vars{'t_logprefix'});
}
# Check if the log line should be excluded from the report
if (&validate_log_line($prefix_vars{'t_pid'})) {
$prefix_vars{'t_host'} = 'stderr';
# Process the log line
&parse_query();
}
# Collect additional query information
} elsif ($cur_pid && ($line !~ $orphan_stderr_line)) {
if ($cur_info{$cur_pid}{vacuum} && ($line =~ /^\t(pages|tuples|buffer usage|avg read rate|system usage):/)) {
if ($line =~ /^\t(pages|tuples): (\d+) removed, (\d+) remain/) {
$autovacuum_info{tables}{$cur_info{$cur_pid}{vacuum}}{$1}{removed} += $2;
}
next;
} elsif ( $cur_info{$cur_pid}{parameters} && (($line =~ /[,\s]*\$(\d+)\s=\s/) || ($line =~ /^'[^']*'$/)) ) {
# stores bind parameters if any
$cur_info{$cur_pid}{parameters} .= " $line";
next;
}
if (exists $cur_info{$cur_pid}{statement}) {
$cur_info{$cur_pid}{statement} .= "\n" . $line;
} elsif (exists $cur_info{$cur_pid}{context}) {
$cur_info{$cur_pid}{context} .= "\n" . $line;
} elsif (exists $cur_info{$cur_pid}{detail}) {
$cur_info{$cur_pid}{detail} .= "\n" . $line;
} else {
$cur_info{$cur_pid}{query} .= "\n" . $line;
}
# Collect orphans lines of multiline queries
} elsif ($cur_pid && ($cur_info{$cur_pid}{query})) {
$cur_info{$cur_pid}{detail} .= "\n" . $line;
}
} else {
# unknown format
&logmsg('DEBUG', "Unknown line format: $line");
}
last if (($stop_offset > 0) && ($current_offset > $stop_offset));
}
}
close $lfile;
# Get stats from all pending temporary storage
foreach my $pid (sort {$cur_info{$a}{date} <=> $cur_info{$b}{date}} keys %cur_info) {
&store_queries($pid);
}
if ($extension eq 'tsung') {
foreach my $pid (sort {$a <=> $b} keys %tsung_session) {
&store_tsung_session($pid);
}
}
if ($progress && !$getout) {
if (!$tmpoutfile) {
if ($totalsize) {
if (($stop_offset > 0) && ($format ne 'csv')) {
print STDERR &progress_bar($cursize - $start_offset, $stop_offset, 25, '=',$overall_stat{'queries_number'},$overall_stat{'errors_number'});
} elsif ($extension eq 'tsung') {
print STDERR &progress_bar($cursize, $totalsize, 25, '=', $logfile);
} else {
print STDERR &progress_bar($cursize, $totalsize, 25, '=', $overall_stat{'queries_number'},$overall_stat{'errors_number'});
}
print STDERR "\n";
}
} else {
$pipe->print("$cursize " . ($overall_stat{'queries_number'} - $old_queries_count) . " " . ($overall_stat{'errors_number'} - $old_errors_count) . "\n");
}
}
%cur_info = ();
if ($tmpoutfile) {
&dump_as_binary($tmpoutfile);
$tmpoutfile->close();
}
# Inform the parent that it should stop parsing other files
if ($getout) {
kill(12, $parent_pid);
}
# Save last line into temporary file
if ($last_parsed && scalar keys %last_line) {
if (open(OUT, ">>$tmp_last_parsed")) {
flock(OUT, 2) || return $getout;
print OUT "$last_line{datetime}\t$last_line{orig}\n";
close(OUT);
} else {
&logmsg('ERROR', "can't save last parsed line into $last_parsed, $!");
}
}
return $getout;
}
# Store the current timestamp of the log line
sub store_current_timestamp
{
my $t_timestamp = shift;
$prefix_vars{'t_date'} = $t_timestamp;
$prefix_vars{'t_date'} =~ s/\D+//g;
if (!$overall_stat{'first_log_ts'} || ($overall_stat{'first_log_ts'} gt $t_timestamp)) {
$overall_stat{'first_log_ts'} = $t_timestamp;
}
if (!$overall_stat{'last_log_ts'} || ($overall_stat{'last_log_ts'} lt $t_timestamp)) {
$overall_stat{'last_log_ts'} = $t_timestamp;
}
}
# Method used to check if we have already reach the last parsing position in incremental mode
# This position should have been saved in the incremental file and read in the $last_parsed at
# start up.
sub check_incremental_position
{
my ($cur_date, $line) = @_;
if ($last_parsed) {
if ($saved_last_line{datetime}) {
if ($cur_date lt $saved_last_line{datetime}) {
return 0;
} elsif (!$last_line{datetime} && ($cur_date eq $saved_last_line{datetime})) {
return 0 if ($line ne $saved_last_line{orig});
}
}
$last_line{datetime} = $cur_date;
$last_line{orig} = $line;
}
return 1;
}
# Display message following the log level
sub logmsg
{
my ($level, $str) = @_;
return if ($quiet && ($level ne 'FATAL'));
return if (!$debug && ($level eq 'DEBUG'));
if ($level =~ /(\d+)/) {
print STDERR "\t" x $1;
}
print STDERR "$level: $str\n";
}
# Normalize SQL queries by removing parameters
sub normalize_query
{
my $orig_query = shift;
return if (!$orig_query);
# Remove comments
$orig_query =~ s/\/\*(.*?)\*\///gs;
$orig_query = lc($orig_query);
# Remove extra space, new line and tab characters by a single space
$orig_query =~ s/[\t\s\r\n]+/ /gs;
# Remove string content
$orig_query =~ s/\\'//g;
$orig_query =~ s/'[^']*'/''/g;
$orig_query =~ s/''('')+/''/g;
# Remove NULL parameters
$orig_query =~ s/=\s*NULL/=''/g;
# Remove numbers
$orig_query =~ s/([^a-z_\$-])-?([0-9]+)/${1}0/g;
# Remove hexadecimal numbers
$orig_query =~ s/([^a-z_\$-])0x[0-9a-f]{1,10}/${1}0x/g;
# Remove IN values
$orig_query =~ s/in\s*\([\'0x,\s]*\)/in (...)/g;
return $orig_query;
}
# Format numbers with comma for better reading
sub comma_numbers
{
return 0 if ($#_ < 0);
my $text = reverse $_[0];
$text =~ s/(\d\d\d)(?=\d)(?!\d*\.)/$1$num_sep/g;
return scalar reverse $text;
}
# Format duration
sub convert_time
{
my $time = shift;
return '0s' if (!$time);
my $days = int($time / 86400000);
$time -= ($days * 86400000);
my $hours = int($time / 3600000);
$time -= ($hours * 3600000);
my $minutes = int($time / 60000);
$time -= ($minutes * 60000);
my $seconds = sprintf("%0.3f", $time / 1000);
$days = $days < 1 ? '' : $days . 'd';
$hours = $hours < 1 ? '' : $hours . 'h';
$minutes = $minutes < 1 ? '' : $minutes . 'm';
$time = $days . $hours . $minutes . $seconds . 's';
return $time;
}
# Stores the top N queries generating the biggest temporary file
sub set_top_tempfile_info
{
my ($q, $sz, $date, $db, $user, $remote, $app) = @_;
push(@top_tempfile_info, [($sz, $date, $q, $db, $user, $remote, $app)]);
my @tmp_top_tempfile_info = sort {$b->[0] <=> $a->[0]} @top_tempfile_info;
@top_tempfile_info = ();
for (my $i = 0; $i <= $#tmp_top_tempfile_info; $i++) {
push(@top_tempfile_info, $tmp_top_tempfile_info[$i]);
last if ($i == $end_top);
}
}
# Stores the top N queries waiting the most
sub set_top_locked_info
{
my ($q, $dt, $date, $db, $user, $remote, $app) = @_;
push(@top_locked_info, [($dt, $date, $q, $db, $user, $remote, $app)]);
my @tmp_top_locked_info = sort {$b->[0] <=> $a->[0]} @top_locked_info;
@top_locked_info = ();
for (my $i = 0; $i <= $#tmp_top_locked_info; $i++) {
push(@top_locked_info, $tmp_top_locked_info[$i]);
last if ($i == $end_top);
}
}
# Stores the top N slowest queries
sub set_top_slowest
{
my ($q, $dt, $date, $db, $user, $remote, $app) = @_;
push(@top_slowest, [($dt, $date, $q, $db, $user, $remote, $app)]);
my @tmp_top_slowest = sort {$b->[0] <=> $a->[0]} @top_slowest;
@top_slowest = ();
for (my $i = 0; $i <= $#tmp_top_slowest; $i++) {
push(@top_slowest, $tmp_top_slowest[$i]);
last if ($i == $end_top);
}
}
# Stores top N slowest sample queries
sub set_top_sample
{
my ($norm, $q, $dt, $date, $db, $user, $remote, $app) = @_;
$normalyzed_info{$norm}{samples}{$dt}{query} = $q;
$normalyzed_info{$norm}{samples}{$dt}{date} = $date;
$normalyzed_info{$norm}{samples}{$dt}{db} = $db;
$normalyzed_info{$norm}{samples}{$dt}{user} = $user;
$normalyzed_info{$norm}{samples}{$dt}{remote} = $remote;
$normalyzed_info{$norm}{samples}{$dt}{app} = $app;
my $i = 1;
foreach my $k (sort {$b <=> $a} keys %{$normalyzed_info{$norm}{samples}}) {
if ($i > $sample) {
delete $normalyzed_info{$norm}{samples}{$k};
}
$i++;
}
}
# Stores top N error sample queries
sub set_top_error_sample
{
my ($q, $date, $real_error, $detail, $context, $statement, $hint, $db) = @_;
# Stop when we have our number of samples
if (!exists $error_info{$q}{date} || ($#{$error_info{$q}{date}} < $sample)) {
if (($q =~ /deadlock detected/) || !grep(/\Q$real_error\E/, @{$error_info{$q}{error}})) {
push(@{$error_info{$q}{date}}, $date);
push(@{$error_info{$q}{detail}}, $detail);
push(@{$error_info{$q}{context}}, $context);
push(@{$error_info{$q}{statement}}, $statement);
push(@{$error_info{$q}{hint}}, $hint);
push(@{$error_info{$q}{error}}, $real_error);
push(@{$error_info{$q}{db}}, $db);
}
}
}
sub dump_as_text
{
# Global information
my $curdate = localtime(time);
my $fmt_nlines = &comma_numbers($nlines);
my $total_time = timestr($td);
$total_time =~ s/^([\.0-9]+) wallclock.*/$1/;
$total_time = &convert_time($total_time * 1000);
my $logfile_str = $log_files[0];
if ($#log_files > 0) {
$logfile_str .= ', ..., ' . $log_files[-1];
}
print $fh qq{
$report_title
- Global information ---------------------------------------------------
Generated on $curdate
Log file: $logfile_str
Parsed $fmt_nlines log entries in $total_time
Log start from $overall_stat{'first_log_ts'} to $overall_stat{'last_log_ts'}
};
# Overall statistics
my $fmt_unique = &comma_numbers(scalar keys %normalyzed_info) || 0;
my $fmt_queries = &comma_numbers($overall_stat{'queries_number'}) || 0;
my $fmt_duration = &convert_time($overall_stat{'queries_duration'}) || 0;
print $fh qq{
- Overall statistics ---------------------------------------------------
Number of unique normalized queries: $fmt_unique
Number of queries: $fmt_queries
Total query duration: $fmt_duration
First query: $overall_stat{'first_query_ts'}
Last query: $overall_stat{'last_query_ts'}
};
foreach (sort {$overall_stat{'query_peak'}{$b} <=> $overall_stat{'query_peak'}{$a}} keys %{$overall_stat{'query_peak'}}) {
print $fh "Query peak: ", &comma_numbers($overall_stat{'query_peak'}{$_}), " queries/s at $_";
last;
}
if (!$disable_error) {
my $fmt_errors = &comma_numbers($overall_stat{'errors_number'}) || 0;
my $fmt_unique_error = &comma_numbers(scalar keys %{$overall_stat{'unique_normalized_errors'}}) || 0;
print $fh qq{
Number of events: $fmt_errors
Number of unique normalized events: $fmt_unique_error
};
}
if ($tempfile_info{count}) {
my $fmt_temp_maxsise = &comma_numbers($tempfile_info{maxsize}) || 0;
my $fmt_temp_avsize = &comma_numbers(sprintf("%.2f", ($tempfile_info{size} / $tempfile_info{count})));
print $fh qq{Number temporary files: $tempfile_info{count}
Max size of temporary files: $fmt_temp_maxsise
Average size of temporary files: $fmt_temp_avsize
};
}
if (!$disable_session && $session_info{count}) {
my $avg_session_duration = &convert_time($session_info{duration} / $session_info{count});
my $tot_session_duration = &convert_time($session_info{duration});
print $fh qq{Total number of sessions: $session_info{count}
Total duration of sessions: $tot_session_duration
Average duration of sessions: $avg_session_duration
};
}
if (!$disable_connection && $connection_info{count}) {
print $fh "Total number of connections: $connection_info{count}\n";
}
if (scalar keys %database_info > 1) {
print $fh "Total number of databases: ", scalar keys %database_info, "\n";
}
if (!$disable_hourly && $overall_stat{'queries_number'}) {
print $fh qq{
- Hourly statistics ----------------------------------------------------
Report not supported by text format
};
}
# INSERT/DELETE/UPDATE/SELECT repartition
my $totala = $overall_stat{'SELECT'} + $overall_stat{'INSERT'} + $overall_stat{'UPDATE'} + $overall_stat{'DELETE'};
if (!$disable_type && $totala) {
my $total = $overall_stat{'queries_number'} || 1;
print $fh "\n- Queries by type ------------------------------------------------------\n\n";
print $fh "Type Count Percentage\n";
print $fh "SELECT: ", &comma_numbers($overall_stat{'SELECT'}) || 0, " ",
sprintf("%0.2f", ($overall_stat{'SELECT'} * 100) / $total), "%\n";
print $fh "INSERT: ", &comma_numbers($overall_stat{'INSERT'}) || 0, " ",
sprintf("%0.2f", ($overall_stat{'INSERT'} * 100) / $total), "%\n";
print $fh "UPDATE: ", &comma_numbers($overall_stat{'UPDATE'}) || 0, " ",
sprintf("%0.2f", ($overall_stat{'UPDATE'} * 100) / $total), "%\n";
print $fh "DELETE: ", &comma_numbers($overall_stat{'DELETE'}) || 0, " ",
sprintf("%0.2f", ($overall_stat{'DELETE'} * 100) / $total), "%\n";
print $fh "OTHERS: ", &comma_numbers($total - $totala) || 0, " ", sprintf("%0.2f", (($total - $totala) * 100) / $total), "%\n"
if (($total - $totala) > 0);
print $fh "\n";
# Show request per database statistics
if (scalar keys %database_info > 1) {
print $fh "\n- Request per database ------------------------------------------------------\n\n";
print $fh "Database Request type Count\n";
foreach my $d (sort keys %database_info) {
print $fh "$d - ", &comma_numbers($database_info{$d}{count}), "\n";
foreach my $r (sort keys %{$database_info{$d}}) {
next if ($r eq 'count');
print $fh "\t$r ", &comma_numbers($database_info{$d}{$r}), "\n";
}
}
}
# Show request per application statistics
if (scalar keys %application_info > 1) {
print $fh "\n- Request per application ------------------------------------------------------\n\n";
print $fh "Application Request type Count\n";
foreach my $d (sort keys %application_info) {
print $fh "$d - ", &comma_numbers($application_info{$d}{count}), "\n";
foreach my $r (sort keys %{$application_info{$d}}) {
next if ($r eq 'count');
print $fh "\t$r ", &comma_numbers($application_info{$d}{$r}), "\n";
}
}
}
}
if (!$disable_lock && scalar keys %lock_info > 0) {
print $fh "\n- Locks by type ------------------------------------------------------\n\n";
print $fh "Type Object Count Total Duration Avg duration (s)\n";
my $total_count = 0;
my $total_duration = 0;
foreach my $t (sort keys %lock_info) {
print $fh "$t\t\t", &comma_numbers($lock_info{$t}{count}), " ", &convert_time($lock_info{$t}{duration}), " ",
&convert_time($lock_info{$t}{duration} / $lock_info{$t}{count}), "\n";
foreach my $o (sort keys %{$lock_info{$t}}) {
next if (($o eq 'count') || ($o eq 'duration') || ($o eq 'chronos'));
print $fh "\t$o\t", &comma_numbers($lock_info{$t}{$o}{count}), " ", &convert_time($lock_info{$t}{$o}{duration}), " ",
&convert_time($lock_info{$t}{$o}{duration} / $lock_info{$t}{$o}{count}), "\n";
}
$total_count += $lock_info{$t}{count};
$total_duration += $lock_info{$t}{duration};
}
print $fh "Total:\t\t\t", &comma_numbers($total_count), " ", &convert_time($total_duration), " ",
&convert_time($total_duration / ($total_count || 1)), "\n";
}
# Show session per database statistics
if (!$disable_session && exists $session_info{database}) {
print $fh "\n- Sessions per database ------------------------------------------------------\n\n";
print $fh "Database Count Total Duration Avg duration (s)\n";
foreach my $d (sort keys %{$session_info{database}}) {
print $fh "$d - ", &comma_numbers($session_info{database}{$d}{count}), " ",
&convert_time($session_info{database}{$d}{duration}), " ",
&convert_time($session_info{database}{$d}{duration} / $session_info{database}{$d}{count}), "\n";
}
}
# Show session per user statistics
if (!$disable_session && exists $session_info{user}) {
print $fh "\n- Sessions per user ------------------------------------------------------\n\n";
print $fh "User Count Total Duration Avg duration (s)\n";
foreach my $d (sort keys %{$session_info{user}}) {
print $fh "$d - ", &comma_numbers($session_info{user}{$d}{count}), " ", &convert_time($session_info{user}{$d}{duration}),
" ", &convert_time($session_info{user}{$d}{duration} / $session_info{user}{$d}{count}), "\n";
}
}
# Show session per host statistics
if (!$disable_session && exists $session_info{host}) {
print $fh "\n- Sessions per host ------------------------------------------------------\n\n";
print $fh "User Count Total Duration Avg duration (s)\n";
foreach my $d (sort keys %{$session_info{host}}) {
print $fh "$d - ", &comma_numbers($session_info{host}{$d}{count}), " ", &convert_time($session_info{host}{$d}{duration}),
" ", &convert_time($session_info{host}{$d}{duration} / $session_info{host}{$d}{count}), "\n";
}
}
# Show connection per database statistics
if (!$disable_connection && exists $connection_info{database}) {
print $fh "\n- Connections per database ------------------------------------------------------\n\n";
print $fh "Database User Count\n";
foreach my $d (sort keys %{$connection_info{database}}) {
print $fh "$d - ", &comma_numbers($connection_info{database}{$d}), "\n";
foreach my $u (sort keys %{$connection_info{user}}) {
next if (!exists $connection_info{database_user}{$d}{$u});
print $fh "\t$u ", &comma_numbers($connection_info{database_user}{$d}{$u}), "\n";
}
}
}
# Show connection per user statistics
if (!$disable_connection && exists $connection_info{user}) {
print $fh "\n- Connections per user ------------------------------------------------------\n\n";
print $fh "User Count\n";
foreach my $d (sort keys %{$connection_info{user}}) {
print $fh "$d - ", &comma_numbers($connection_info{user}{$d}), "\n";
}
}
# Show connection per host statistics
if (!$disable_connection && exists $connection_info{host}) {
print $fh "\n- Connections per host ------------------------------------------------------\n\n";
print $fh "User Count\n";
foreach my $d (sort keys %{$connection_info{host}}) {
print $fh "$d - ", &comma_numbers($connection_info{host}{$d}), "\n";
}
}
# Show lock wait detailed informations
if (!$disable_lock && scalar keys %lock_info > 0) {
my @top_locked_queries;
foreach my $h (keys %normalyzed_info) {
if (exists($normalyzed_info{$h}{locks})) {
push (@top_locked_queries, [$h, $normalyzed_info{$h}{locks}{count}, $normalyzed_info{$h}{locks}{wait},
$normalyzed_info{$h}{locks}{minwait}, $normalyzed_info{$h}{locks}{maxwait}]);
}
}
# Most frequent waiting queries (N)
@top_locked_queries = sort {$b->[2] <=> $a->[2]} @top_locked_queries;
print $fh "\n- Most frequent waiting queries (N) -----------------------------------------\n\n";
print $fh "Rank Count Total wait time (s) Min/Max/Avg duration (s) Query\n";
for (my $i = 0 ; $i <= $#top_locked_queries ; $i++) {
last if ($i > $end_top);
print $fh ($i + 1), ") ", $top_locked_queries[$i]->[1], " - ", &convert_time($top_locked_queries[$i]->[2]),
" - ", &convert_time($top_locked_queries[$i]->[3]), "/", &convert_time($top_locked_queries[$i]->[4]), "/",
&convert_time(($top_locked_queries[$i]->[4] / $top_locked_queries[$i]->[1])),
" - ", $top_locked_queries[$i]->[0], "\n";
print $fh "--\n";
my $k = $top_locked_queries[$i]->[0];
my $j = 1;
foreach my $d (sort {$b <=> $a} keys %{$normalyzed_info{$k}{samples}}) {
my $ttl = $top_locked_info[$i]->[1] || '';
my $db = " - $normalyzed_info{$k}{samples}{$d}{date} - database: $normalyzed_info{$k}{samples}{$d}{db}" if ($normalyzed_info{$k}{samples}{$d}{db});
$db .= ", user: $normalyzed_info{$k}{samples}{$d}{user}" if ($normalyzed_info{$k}{samples}{$d}{user});
$db .= ", remote: $normalyzed_info{$k}{samples}{$d}{remote}" if ($normalyzed_info{$k}{samples}{$d}{remote});
$db .= ", app: $normalyzed_info{$k}{samples}{$d}{app}" if ($normalyzed_info{$k}{samples}{$d}{app});
$db =~ s/^, / - /;
print $fh "\t- Example $j: ", &convert_time($d), "$db - ", $normalyzed_info{$k}{samples}{$d}{query}, "\n";
$j++;
}
}
print $fh "\n";
@top_locked_queries = ();
# Queries that waited the most
@top_locked_info = sort {$b->[1] <=> $a->[1]} @top_locked_info;
print $fh "\n- Queries that waited the mosts ---------------------------------------------\n\n";
print $fh "Rank Wait time (s) Query\n";
for (my $i = 0 ; $i <= $#top_locked_info ; $i++) {
my $ttl = $top_locked_info[$i]->[1] || '';
my $db = " - database: $top_locked_info[$i]->[3]" if ($top_locked_info[$i]->[3]);
$db .= ", user: $top_locked_info[$i]->[4]" if ($top_locked_info[$i]->[4]);
$db .= ", remote: $top_locked_info[$i]->[5]" if ($top_locked_info[$i]->[5]);
$db .= ", app: $top_locked_info[$i]->[6]" if ($top_locked_info[$i]->[6]);
$db =~ s/^, / - /;
print $fh ($i + 1), ") ", &convert_time($top_locked_info[$i]->[0]),
" $ttl$db - ", $top_locked_info[$i]->[2], "\n";
print $fh "--\n";
}
print $fh "\n";
}
# Show temporary files detailed informations
if (!$disable_temporary && scalar keys %tempfile_info > 0) {
my @top_temporary;
foreach my $h (keys %normalyzed_info) {
if (exists($normalyzed_info{$h}{tempfiles})) {
push (@top_temporary, [$h, $normalyzed_info{$h}{tempfiles}{count}, $normalyzed_info{$h}{tempfiles}{size},
$normalyzed_info{$h}{tempfiles}{minsize}, $normalyzed_info{$h}{tempfiles}{maxsize}]);
}
}
# Queries generating the most temporary files (N)
@top_temporary = sort {$b->[1] <=> $a->[1]} @top_temporary;
print $fh "\n- Queries generating the most temporary files (N) ---------------------------\n\n";
print $fh "Rank Count Total size Min/Max/Avg size Query\n";
my $idx = 1;
for (my $i = 0 ; $i <= $#top_temporary ; $i++) {
last if ($i > $end_top);
print $fh $idx, ") ",
$top_temporary[$i]->[1], " - ", &comma_numbers($top_temporary[$i]->[2]),
" - ", &comma_numbers($top_temporary[$i]->[3]),
"/", &comma_numbers($top_temporary[$i]->[4]), "/",
&comma_numbers(sprintf("%.2f", $top_temporary[$i]->[2] / $top_temporary[$i]->[1])),
" - ", $top_temporary[$i]->[0], "\n";
print $fh "--\n";
my $k = $top_temporary[$i]->[0];
if ($normalyzed_info{$k}{count} > 1) {
my $j = 1;
foreach my $d (sort {$b <=> $a} keys %{$normalyzed_info{$k}{samples}}) {
my $db = "$normalyzed_info{$k}{samples}{$d}{date} - database: $normalyzed_info{$k}{samples}{$d}{db}" if ($normalyzed_info{$k}{samples}{$d}{db});
$db .= ", user: $normalyzed_info{$k}{samples}{$d}{user}" if ($normalyzed_info{$k}{samples}{$d}{user});
$db .= ", remote: $normalyzed_info{$k}{samples}{$d}{remote}" if ($normalyzed_info{$k}{samples}{$d}{remote});
$db .= ", app: $normalyzed_info{$k}{samples}{$d}{app}" if ($normalyzed_info{$k}{samples}{$d}{app});
$db =~ s/^, / - /;
print $fh "\t- Example $j: ", &convert_time($d), " - $db - ", $normalyzed_info{$k}{samples}{$d}{query}, "\n";
$j++;
}
}
$idx++;
}
@top_temporary = ();
# Top queries generating the largest temporary files
@top_tempfile_info = sort {$b->[1] <=> $a->[1]} @top_tempfile_info;
print $fh "\n- Queries generating the largest temporary files ----------------------------\n\n";
print $fh "Rank Size Query\n";
for (my $i = 0 ; $i <= $#top_tempfile_info ; $i++) {
my $ttl = $top_tempfile_info[$i]->[1] || '';
my $db = " - database: $top_tempfile_info[$i]->[3]" if ($top_tempfile_info[$i]->[3]);
$db .= ", user: $top_tempfile_info[$i]->[4]" if ($top_tempfile_info[$i]->[4]);
$db .= ", remote: $top_tempfile_info[$i]->[5]" if ($top_tempfile_info[$i]->[5]);
$db .= ", app: $top_tempfile_info[$i]->[6]" if ($top_tempfile_info[$i]->[6]);
$db =~ s/^, / - /;
print $fh ($i + 1), ") ", &comma_numbers($top_tempfile_info[$i]->[0]),
" - $ttl$db - ", $top_tempfile_info[$i]->[2], "\n";
}
print $fh "\n";
}
# Show top information
if (!$disable_query && ($#top_slowest >= 0)) {
print $fh "\n- Slowest queries ------------------------------------------------------\n\n";
print $fh "Rank Duration (s) Query\n";
for (my $i = 0 ; $i <= $#top_slowest ; $i++) {
my $db = " database: $top_slowest[$i]->[3]" if ($top_slowest[$i]->[3]);
$db .= ", user: $top_slowest[$i]->[4]" if ($top_slowest[$i]->[4]);
$db .= ", remote: $top_slowest[$i]->[5]" if ($top_slowest[$i]->[5]);
$db .= ", app: $top_slowest[$i]->[6]" if ($top_slowest[$i]->[6]);
$db =~ s/^, //;
print $fh $i + 1, ") " . &convert_time($top_slowest[$i]->[0]) . "$db - $top_slowest[$i]->[2]\n";
print $fh "--\n";
}
print $fh "\n- Queries that took up the most time (N) -------------------------------\n\n";
print $fh "Rank Total duration Times executed Min/Max/Avg duration (s) Query\n";
my $idx = 1;
foreach my $k (sort {$normalyzed_info{$b}{duration} <=> $normalyzed_info{$a}{duration}} keys %normalyzed_info) {
next if (!$normalyzed_info{$k}{count});
last if ($idx > $top);
my $q = $k;
if ($normalyzed_info{$k}{count} == 1) {
foreach (keys %{$normalyzed_info{$k}{samples}}) {
$q = $normalyzed_info{$k}{samples}{$_}{query};
last;
}
}
$normalyzed_info{$k}{average} = $normalyzed_info{$k}{duration} / $normalyzed_info{$k}{count};
print $fh "$idx) "
. &convert_time($normalyzed_info{$k}{duration}) . " - "
. &comma_numbers($normalyzed_info{$k}{count}) . " - "
. &convert_time($normalyzed_info{$k}{min}) . "/"
. &convert_time($normalyzed_info{$k}{max}) . "/"
. &convert_time($normalyzed_info{$k}{average})
. " - $q\n";
print $fh "--\n";
my $i = 1;
foreach my $d (sort {$b <=> $a} keys %{$normalyzed_info{$k}{samples}}) {
my $db = " - database: $normalyzed_info{$k}{samples}{$d}{db}" if ($normalyzed_info{$k}{samples}{$d}{db});
$db .= ", user: $normalyzed_info{$k}{samples}{$d}{user}" if ($normalyzed_info{$k}{samples}{$d}{user});
$db .= ", remote: $normalyzed_info{$k}{samples}{$d}{remote}" if ($normalyzed_info{$k}{samples}{$d}{remote});
$db .= ", app: $normalyzed_info{$k}{samples}{$d}{app}" if ($normalyzed_info{$k}{samples}{$d}{app});
$db =~ s/^, / - /;
print $fh "\t- Example $i: ", &convert_time($d), "$db - ", $normalyzed_info{$k}{samples}{$d}{query}, "\n";
$i++;
}
$idx++;
}
}
if (!$disable_query && (scalar keys %normalyzed_info > 0)) {
print $fh "\n- Most frequent queries (N) --------------------------------------------\n\n";
print $fh "Rank Times executed Total duration Min/Max/Avg duration (s) Query\n";
my $idx = 1;
foreach my $k (sort {$normalyzed_info{$b}{count} <=> $normalyzed_info{$a}{count}} keys %normalyzed_info) {
next if (!$normalyzed_info{$k}{count});
last if ($idx > $top);
my $q = $k;
if ($normalyzed_info{$k}{count} == 1) {
foreach (keys %{$normalyzed_info{$k}{samples}}) {
$q = $normalyzed_info{$k}{samples}{$_}{query};
last;
}
}
print $fh "$idx) "
. &comma_numbers($normalyzed_info{$k}{count}) . " - "
. &convert_time($normalyzed_info{$k}{duration}) . " - "
. &convert_time($normalyzed_info{$k}{min}) . "/"
. &convert_time($normalyzed_info{$k}{max}) . "/"
. &convert_time($normalyzed_info{$k}{duration} / $normalyzed_info{$k}{count})
. " - $q\n";
print $fh "--\n";
my $i = 1;
foreach my $d (sort {$b <=> $a} keys %{$normalyzed_info{$k}{samples}}) {
my $db = " - database: $normalyzed_info{$k}{samples}{$d}{db}" if ($normalyzed_info{$k}{samples}{$d}{db});
$db .= ", user: $normalyzed_info{$k}{samples}{$d}{user}" if ($normalyzed_info{$k}{samples}{$d}{user});
$db .= ", remote: $normalyzed_info{$k}{samples}{$d}{remote}" if ($normalyzed_info{$k}{samples}{$d}{remote});
$db .= ", app: $normalyzed_info{$k}{samples}{$d}{app}" if ($normalyzed_info{$k}{samples}{$d}{app});
$db =~ s/^, / - /;
print $fh "\tExample $i: ", &convert_time($d), "$db - ", $normalyzed_info{$k}{samples}{$d}{query}, "\n";
$i++;
}
$idx++;
}
}
if (!$disable_query && ($#top_slowest >= 0)) {
print $fh "\n- Slowest queries (N) --------------------------------------------------\n\n";
print $fh "Rank Min/Max/Avg duration (s) Times executed Total duration Query\n";
my $idx = 1;
foreach my $k (sort {$normalyzed_info{$b}{average} <=> $normalyzed_info{$a}{average}} keys %normalyzed_info) {
next if (!$normalyzed_info{$k}{count});
last if ($idx > $top);
my $q = $k;
if ($normalyzed_info{$k}{count} == 1) {
foreach (keys %{$normalyzed_info{$k}{samples}}) {
$q = $normalyzed_info{$k}{samples}{$_}{query};
last;
}
}
print $fh "$idx) "
. &convert_time($normalyzed_info{$k}{min}) . "/"
. &convert_time($normalyzed_info{$k}{max}) . "/"
. &convert_time($normalyzed_info{$k}{average}) . " - "
. &comma_numbers($normalyzed_info{$k}{count}) . " - "
. &convert_time($normalyzed_info{$k}{duration})
. " - $q\n";
print $fh "--\n";
my $i = 1;
foreach my $d (sort {$b <=> $a} keys %{$normalyzed_info{$k}{samples}}) {
my $db = " - database: $normalyzed_info{$k}{samples}{$d}{db}" if ($normalyzed_info{$k}{samples}{$d}{db});
$db .= ", user: $normalyzed_info{$k}{samples}{$d}{user}" if ($normalyzed_info{$k}{samples}{$d}{user});
$db .= ", remote: $normalyzed_info{$k}{samples}{$d}{remote}" if ($normalyzed_info{$k}{samples}{$d}{remote});
$db .= ", app: $normalyzed_info{$k}{samples}{$d}{app}" if ($normalyzed_info{$k}{samples}{$d}{app});
$db =~ s/^, / - /;
print $fh "\tExample $i: ", &convert_time($d), "$db - ", $normalyzed_info{$k}{samples}{$d}{query}, "\n";
$i++;
}
$idx++;
}
}
@top_slowest = ();
if (!$disable_error) {
&show_error_as_text();
}
print $fh "\n\n";
print $fh "Report generated by pgBadger $VERSION ($project_url).\n";
}
sub dump_error_as_text
{
# Global information
my $curdate = localtime(time);
my $fmt_nlines = &comma_numbers($nlines);
my $total_time = timestr($td);
$total_time =~ s/^([\.0-9]+) wallclock.*/$1/;
$total_time = &convert_time($total_time * 1000);
my $logfile_str = $log_files[0];
if ($#log_files > 0) {
$logfile_str .= ', ..., ' . $log_files[-1];
}
print $fh qq{
$report_title
- Global information ---------------------------------------------------
Generated on $curdate
Log file: $logfile_str
Parsed $fmt_nlines log entries in $total_time
Log start from $overall_stat{'first_log_ts'} to $overall_stat{'last_log_ts'}
};
&show_error_as_text();
print $fh "\n\n";
print $fh "Report generated by pgBadger $VERSION ($project_url).\n";
}
sub show_error_as_text
{
return if (scalar keys %error_info == 0);
print $fh "\n- Most frequent events (N) ---------------------------------------------\n\n";
my $idx = 1;
foreach my $k (sort {$error_info{$b}{count} <=> $error_info{$a}{count}} keys %error_info) {
next if (!$error_info{$k}{count});
last if ($idx > $top);
if ($error_info{$k}{count} > 1) {
my $msg = $k;
$msg =~ s/HINT: (parameter "[^"]+" changed to)/LOG: $1/;
$msg =~ s/HINT: (database system was shut down)/LOG: $1/;
print $fh "$idx) " . &comma_numbers($error_info{$k}{count}) . " - $msg\n";
print $fh "--\n";
my $j = 1;
for (my $i = 0 ; $i <= $#{$error_info{$k}{date}} ; $i++) {
if ( ($error_info{$k}{error}[$i] =~ s/HINT: (parameter "[^"]+" changed to)/LOG: $1/)
|| ($error_info{$k}{error}[$i] =~ s/HINT: (database system was shut down)/LOG: $1/))
{
$logs_type{HINT}--;
$logs_type{LOG}++;
}
print $fh "\t- Example $j: $error_info{$k}{date}[$i] - $error_info{$k}{error}[$i]\n";
print $fh "\t\tDetail: $error_info{$k}{detail}[$i]\n" if ($error_info{$k}{detail}[$i]);
print $fh "\t\tContext: $error_info{$k}{context}[$i]\n" if ($error_info{$k}{context}[$i]);
print $fh "\t\tHint: $error_info{$k}{hint}[$i]\n" if ($error_info{$k}{hint}[$i]);
print $fh "\t\tStatement: $error_info{$k}{statement}[$i]\n" if ($error_info{$k}{statement}[$i]);
print $fh "\t\tDatabase: $error_info{$k}{db}[$i]\n" if ($error_info{$k}{db}[$i]);
$j++;
}
} else {
if ( ($error_info{$k}{error}[0] =~ s/HINT: (parameter "[^"]+" changed to)/LOG: $1/)
|| ($error_info{$k}{error}[0] =~ s/HINT: (database system was shut down)/LOG: $1/))
{
$logs_type{HINT}--;
$logs_type{LOG}++;
}
print $fh "$idx) " . &comma_numbers($error_info{$k}{count}) . " - $error_info{$k}{error}[0]\n";
print $fh "--\n";
print $fh "\t- Date: $error_info{$k}{date}[0]\n";
print $fh "\t\tDetail: $error_info{$k}{detail}[0]\n" if ($error_info{$k}{detail}[0]);
print $fh "\t\tContext: $error_info{$k}{context}[0]\n" if ($error_info{$k}{context}[0]);
print $fh "\t\tHint: $error_info{$k}{hint}[0]\n" if ($error_info{$k}{hint}[0]);
print $fh "\t\tStatement: $error_info{$k}{statement}[0]\n" if ($error_info{$k}{statement}[0]);
print $fh "\t\tDatabase: $error_info{$k}{db}[0]\n" if ($error_info{$k}{db}[0]);
}
$idx++;
}
if (scalar keys %logs_type > 0) {
print $fh "\n- Logs per type ---------------------------------------------\n\n";
my $total_logs = 0;
foreach my $d (keys %logs_type) {
$total_logs += $logs_type{$d};
}
print $fh "Logs type Count Percentage\n";
foreach my $d (sort keys %logs_type) {
next if (!$logs_type{$d});
print $fh "$d\t\t", &comma_numbers($logs_type{$d}), "\t", sprintf("%0.2f", ($logs_type{$d} * 100) / $total_logs), "%\n";
}
}
}
sub get_page_style
{
return qq{
};
}
sub html_header
{
my $date = localtime(time);
my $style = &get_page_style();
print $fh qq{
$report_title
$style
};
if (!$nograph) {
my @jscode = ;
print $fh <
};
my $idx = 1;
foreach my $k (sort {$error_info{$b}{count} <=> $error_info{$a}{count}} keys %error_info) {
next if (!$error_info{$k}{count});
last if ($idx > $top);
my $col = $idx % 2;
print $fh
"
\n";
}
}
sub load_stats
{
my $fd = shift;
my %stats = %{ fd_retrieve($fd) };
my %_overall_stat = %{$stats{overall_stat}};
my %_normalyzed_info = %{$stats{normalyzed_info}};
my %_error_info = %{$stats{error_info}};
my %_connection_info = %{$stats{connection_info}};
my %_database_info = %{$stats{database_info}};
my %_application_info = %{$stats{application_info}};
my %_checkpoint_info = %{$stats{checkpoint_info}};
my %_restartpoint_info = %{$stats{restartpoint_info}};
my %_session_info = %{$stats{session_info}};
my %_tempfile_info = %{$stats{tempfile_info}};
my %_logs_type = %{$stats{logs_type}};
my %_lock_info = %{$stats{lock_info}};
my %_per_hour_info = %{$stats{per_hour_info}};
my %_per_minute_info = %{$stats{per_minute_info}};
my @_top_slowest = @{$stats{top_slowest}};
my $_nlines = $stats{nlines};
my $_first_log_timestamp = $stats{first_log_timestamp};
my $_last_log_timestamp = $stats{last_log_timestamp};
my @_log_files = @{$stats{log_files}};
my %_autovacuum_info = %{$stats{autovacuum_info}};
my %_autoanalyze_info = %{$stats{autoanalyze_info}};
return if (!$_overall_stat{queries_number} && !$_overall_stat{'errors_number'});
### overall_stat ###
$overall_stat{queries_number} += $_overall_stat{queries_number};
$overall_stat{'first_log_ts'} = $_overall_stat{'first_log_ts'}
if not $overall_stat{'first_log_ts'}
or $overall_stat{'first_log_ts'} gt $_overall_stat{'first_log_ts'};
$overall_stat{'last_log_ts'} = $_overall_stat{'last_log_ts'}
if not $overall_stat{'last_log_ts'}
or $overall_stat{'last_log_ts'} lt $_overall_stat{'last_log_ts'};
$overall_stat{first_query_ts} = $_overall_stat{first_query_ts}
if not $overall_stat{first_query_ts}
or $overall_stat{first_query_ts} gt $_overall_stat{first_query_ts};
$overall_stat{last_query_ts} = $_overall_stat{last_query_ts}
if not $overall_stat{last_query_ts}
or $overall_stat{last_query_ts} lt $_overall_stat{last_query_ts};
$overall_stat{errors_number} += $_overall_stat{errors_number};
$overall_stat{queries_duration} += $_overall_stat{queries_duration};
$overall_stat{DELETE} += $_overall_stat{DELETE}
if exists $_overall_stat{DELETE};
$overall_stat{UPDATE} += $_overall_stat{UPDATE}
if exists $_overall_stat{UPDATE};
$overall_stat{INSERT} += $_overall_stat{INSERT}
if exists $_overall_stat{INSERT};
$overall_stat{SELECT} += $_overall_stat{SELECT}
if exists $_overall_stat{SELECT};
foreach my $k (keys %{$_overall_stat{query_peak}}) {
$overall_stat{query_peak}{$k} += $_overall_stat{query_peak}{$k};
}
# FIXME == $error_info ??
foreach my $k (keys %{$_overall_stat{unique_normalized_errors}}) {
$overall_stat{unique_normalized_errors}{$k} += $_overall_stat{unique_normalized_errors}{$k};
}
$logs_type{ERROR} += $_logs_type{ERROR} if exists $_logs_type{ERROR};
$logs_type{LOG} += $_logs_type{LOG} if exists $_logs_type{LOG};
$logs_type{DETAIL} += $_logs_type{DETAIL} if exists $_logs_type{DETAIL};
$logs_type{STATEMENT} += $_logs_type{STATEMENT} if exists $_logs_type{STATEMENT};
### database_info ###
foreach my $db (keys %_database_info) {
foreach my $k (keys %{ $_database_info{$db} }) {
$database_info{$db}{$k} += $_database_info{$db}{$k};
}
}
### application_info ###
foreach my $app (keys %_application_info) {
foreach my $k (keys %{ $_application_info{$app} }) {
$application_info{$app}{$k} += $_application_info{$app}{$k};
}
}
### connection_info ###
foreach my $db (keys %{ $_connection_info{database} }) {
$connection_info{database}{$db} += $_connection_info{database}{$db};
}
foreach my $db (keys %{ $_connection_info{database_user} }) {
foreach my $user (keys %{ $_connection_info{database_user}{$db} }) {
$connection_info{database_user}{$db}{$user} += $_connection_info{database_user}{$db}{$user};
}
}
foreach my $user (keys %{ $_connection_info{user} }) {
$connection_info{user}{$user} += $_connection_info{user}{$user};
}
foreach my $host (keys %{ $_connection_info{host} }) {
$connection_info{host}{$host} += $_connection_info{host}{$host};
}
$connection_info{count} += $_connection_info{count};
foreach my $day (keys %{ $_connection_info{chronos} }) {
foreach my $hour (keys %{ $_connection_info{chronos}{$day} }) {
foreach my $db (keys %{ $_connection_info{chronos}{$day}{$hour}{database} }) {
$connection_info{chronos}{$day}{$hour}{database}{$db} += $_connection_info{chronos}{$day}{$hour}{database}{$db};
}
foreach my $db (keys %{ $_connection_info{chronos}{$day}{$hour}{database_user} }) {
foreach my $user (keys %{ $_connection_info{chronos}{$day}{$hour}{database_user}{$db} }) {
$connection_info{chronos}{$day}{$hour}{database_user}{$db}{$user} +=
$_connection_info{chronos}{$day}{$hour}{database_user}{$db}{$user};
}
}
$connection_info{chronos}{$day}{$hour}{count} += $_connection_info{chronos}{$day}{$hour}{count};
foreach my $user (keys %{ $_connection_info{chronos}{$day}{$hour}{user} }) {
$connection_info{chronos}{$day}{$hour}{user}{$user} +=
$_connection_info{chronos}{$day}{$hour}{user}{$user};
}
foreach my $host (keys %{ $_connection_info{chronos}{$day}{$hour}{host} }) {
$connection_info{chronos}{$day}{$hour}{host}{$host} +=
$_connection_info{chronos}{$day}{$hour}{host}{$host};
}
}
}
### log_files ###
foreach my $f (@_log_files) {
push(@log_files, $f) if (!grep(m#^$f$#, @_log_files));
}
### per_hour_info ###
foreach my $day (keys %_per_hour_info) {
foreach my $hour (keys %{ $_per_hour_info{$day} }) {
$per_hour_info{$day}{$hour}{count} += $_per_hour_info{$day}{$hour}{count};
$per_hour_info{$day}{$hour}{duration} += $_per_hour_info{$day}{$hour}{duration};
# Set min / max duration for this query
if (!exists $per_hour_info{$day}{$hour}{min} || ($per_hour_info{$day}{$hour}{min} > $_per_hour_info{$day}{$hour}{min})) {
$per_hour_info{$day}{$hour}{min} = $_per_hour_info{$day}{$hour}{min};
}
if (!exists $per_hour_info{$day}{$hour}{max} || ($per_hour_info{$day}{$hour}{max} < $_per_hour_info{$day}{$hour}{max})) {
$per_hour_info{$day}{$hour}{max} = $_per_hour_info{$day}{$hour}{max};
}
if (exists $_per_hour_info{$day}{$hour}{DELETE}) {
$per_hour_info{$day}{$hour}{DELETE}{count} += $_per_hour_info{$day}{$hour}{DELETE}{count};
$per_hour_info{$day}{$hour}{DELETE}{duration} += $_per_hour_info{$day}{$hour}{DELETE}{duration};
}
if (exists $_per_hour_info{$day}{$hour}{SELECT}) {
$per_hour_info{$day}{$hour}{SELECT}{count} += $_per_hour_info{$day}{$hour}{SELECT}{count};
$per_hour_info{$day}{$hour}{SELECT}{duration} += $_per_hour_info{$day}{$hour}{SELECT}{duration};
}
if (exists $_per_hour_info{$day}{$hour}{INSERT}) {
$per_hour_info{$day}{$hour}{INSERT}{count} += $_per_hour_info{$day}{$hour}{INSERT}{count};
$per_hour_info{$day}{$hour}{INSERT}{duration} += $_per_hour_info{$day}{$hour}{INSERT}{duration};
}
if (exists $_per_hour_info{$day}{$hour}{UPDATE}) {
$per_hour_info{$day}{$hour}{UPDATE}{count} += $_per_hour_info{$day}{$hour}{UPDATE}{count};
$per_hour_info{$day}{$hour}{UPDATE}{duration} += $_per_hour_info{$day}{$hour}{UPDATE}{duration};
}
}
}
### error_info ###
foreach my $q (keys %_error_info) {
$error_info{$q}{count} += $_error_info{$q}{count};
# Keep only the wanted sample number
if (!exists $error_info{$q}{date} || ($#{$error_info{$q}{date}} < $sample)) {
push(@{$error_info{$q}{date}}, @{$_error_info{$q}{date}});
push(@{$error_info{$q}{detail}}, @{$_error_info{$q}{detail}});
push(@{$error_info{$q}{context}}, @{$_error_info{$q}{context}});
push(@{$error_info{$q}{statement}}, @{$_error_info{$q}{statement}});
push(@{$error_info{$q}{hint}}, @{$_error_info{$q}{hint}});
push(@{$error_info{$q}{error}}, @{$_error_info{$q}{error}});
push(@{$error_info{$q}{db}}, @{$_error_info{$q}{db}});
foreach my $day (keys %{ $_error_info{$q}{chronos} }) {
foreach my $hour (keys %{$_error_info{$q}{chronos}{$day}}) {
$error_info{$q}{chronos}{$day}{$hour}{count} += $_error_info{$q}{chronos}{$day}{$hour}{count};
}
}
}
}
### per_minute_info ###
foreach my $day (keys %{ $_per_minute_info{connection} }) {
foreach my $hour (keys %{ $_per_minute_info{connection}{$day} }) {
foreach my $min (keys %{ $_per_minute_info{connection}{$day}{$hour} }) {
$per_minute_info{connection}{$day}{$hour}{$min}{count} +=
$_per_minute_info{connection}{$day}{$hour}{$min}{count};
foreach my $sec (keys %{ $_per_minute_info{connection}{$day}{$hour}{$min}{second} }) {
$per_minute_info{connection}{$day}{$hour}{$min}{second}{$sec} +=
$_per_minute_info{connection}{$day}{$hour}{$min}{second}{$sec};
}
}
}
}
foreach my $day (keys %{ $_per_minute_info{query} }) {
foreach my $hour (keys %{ $_per_minute_info{query}{$day} }) {
foreach my $min (keys %{ $_per_minute_info{query}{$day}{$hour} }) {
$per_minute_info{query}{$day}{$hour}{$min}{count} +=
$_per_minute_info{query}{$day}{$hour}{$min}{count};
$per_minute_info{query}{$day}{$hour}{$min}{duration} +=
$_per_minute_info{query}{$day}{$hour}{$min}{duration};
foreach my $sec (keys %{ $_per_minute_info{query}{$day}{$hour}{$min}{second} }) {
$per_minute_info{query}{$day}{$hour}{$min}{second}{$sec} +=
$_per_minute_info{query}{$day}{$hour}{$min}{second}{$sec};
}
}
}
}
### lock_info ###
foreach my $lock (keys %_lock_info) {
$lock_info{$lock}{count} += $_lock_info{$lock}{count};
foreach my $day (keys %{ $_lock_info{chronos} }) {
foreach my $hour (keys %{ $_lock_info{chronos}{$day} }) {
$lock_info{chronos}{$day}{$hour}{count} += $_lock_info{chronos}{$day}{$hour}{count};
$lock_info{chronos}{$day}{$hour}{duration} += $_lock_info{chronos}{$day}{$hour}{duration};
}
}
$lock_info{$lock}{duration} += $_lock_info{$lock}{duration};
foreach my $type (keys %{$_lock_info{$lock}}) {
next if $type =~ /^(count|chronos|duration)$/;
$lock_info{$lock}{$type}{count} += $_lock_info{$lock}{$type}{count};
$lock_info{$lock}{$type}{duration} += $_lock_info{$lock}{$type}{duration};
}
}
### nlines ###
$nlines += $_nlines;
### normalyzed_info ###
foreach my $stmt (keys %_normalyzed_info) {
foreach my $dt (keys %{$_normalyzed_info{$stmt}{samples}} ) {
$normalyzed_info{$stmt}{samples}{$dt} = $_normalyzed_info{$stmt}{samples}{$dt};
}
# Keep only the top N samples
my $i = 1;
foreach my $k (sort {$b <=> $a} keys %{$normalyzed_info{$stmt}{samples}}) {
if ($i > $sample) {
delete $normalyzed_info{$stmt}{samples}{$k};
}
$i++;
}
$normalyzed_info{$stmt}{count} += $_normalyzed_info{$stmt}{count};
# Set min / max duration for this query
if (!exists $normalyzed_info{$stmt}{min} || ($normalyzed_info{$stmt}{min} > $_normalyzed_info{$stmt}{min})) {
$normalyzed_info{$stmt}{min} = $_normalyzed_info{$stmt}{min};
}
if (!exists $normalyzed_info{$stmt}{max} || ($normalyzed_info{$stmt}{max} < $_normalyzed_info{$stmt}{max})) {
$normalyzed_info{$stmt}{max} = $_normalyzed_info{$stmt}{max};
}
foreach my $day (keys %{$_normalyzed_info{$stmt}{chronos}} ) {
foreach my $hour (keys %{$_normalyzed_info{$stmt}{chronos}{$day}} ) {
$normalyzed_info{$stmt}{chronos}{$day}{$hour}{count} +=
$_normalyzed_info{$stmt}{chronos}{$day}{$hour}{count};
$normalyzed_info{$stmt}{chronos}{$day}{$hour}{duration} +=
$_normalyzed_info{$stmt}{chronos}{$day}{$hour}{duration};
}
}
$normalyzed_info{$stmt}{duration} += $_normalyzed_info{$stmt}{duration};
if (exists $_normalyzed_info{$stmt}{locks}) {
$normalyzed_info{$stmt}{locks}{count} += $_normalyzed_info{$stmt}{locks}{count};
$normalyzed_info{$stmt}{locks}{wait} += $_normalyzed_info{$stmt}{locks}{wait};
if (!exists $normalyzed_info{$stmt}{locks}{minwait} || ($normalyzed_info{$stmt}{locks}{minwait} > $_normalyzed_info{$stmt}{locks}{minwait})) {
$normalyzed_info{$stmt}{locks}{minwait} = $_normalyzed_info{$stmt}{locks}{minwait};
}
if (!exists $normalyzed_info{$stmt}{locks}{maxwait} || ($normalyzed_info{$stmt}{locks}{maxwait} < $_normalyzed_info{$stmt}{locks}{maxwait})) {
$normalyzed_info{$stmt}{locks}{maxwait} = $_normalyzed_info{$stmt}{locks}{maxwait};
}
}
if (exists $_normalyzed_info{$stmt}{tempfiles}) {
$normalyzed_info{$stmt}{tempfiles}{count} += $_normalyzed_info{$stmt}{tempfiles}{count};
$normalyzed_info{$stmt}{tempfiles}{size} += $_normalyzed_info{$stmt}{tempfiles}{size};
if (!exists $normalyzed_info{$stmt}{tempfiles}{minsize} || ($normalyzed_info{$stmt}{tempfiles}{minsize} > $_normalyzed_info{$stmt}{tempfiles}{minsize})) {
$normalyzed_info{$stmt}{tempfiles}{minsize} = $_normalyzed_info{$stmt}{tempfiles}{minsize};
}
if (!exists $normalyzed_info{$stmt}{tempfiles}{maxsize} || ($normalyzed_info{$stmt}{tempfiles}{maxsize} < $_normalyzed_info{$stmt}{tempfiles}{maxsize})) {
$normalyzed_info{$stmt}{tempfiles}{maxsize} = $_normalyzed_info{$stmt}{tempfiles}{maxsize};
}
}
}
### session_info ###
foreach my $db (keys %{ $_session_info{database}}) {
$session_info{database}{$db}{count} += $_session_info{database}{$db}{count};
$session_info{database}{$db}{duration} += $_session_info{database}{$db}{duration};
}
$session_info{count} += $_session_info{count};
foreach my $day (keys %{ $_session_info{chronos}}) {
foreach my $hour (keys %{ $_session_info{chronos}{$day}}) {
$session_info{chronos}{$day}{$hour}{count} += $_session_info{chronos}{$day}{$hour}{count};
$session_info{chronos}{$day}{$hour}{duration} += $_session_info{chronos}{$day}{$hour}{duration};
}
}
foreach my $user (keys %{ $_session_info{user}}) {
$session_info{user}{$user}{count} += $_session_info{user}{$user}{count};
$session_info{user}{$user}{duration} += $_session_info{user}{$user}{duration};
}
$session_info{duration} += $_session_info{duration};
foreach my $host (keys %{ $_session_info{host}}) {
$session_info{host}{$host}{count} += $_session_info{host}{$host}{count};
$session_info{host}{$host}{duration} += $_session_info{host}{$host}{duration};
}
### tempfile_info ###
$tempfile_info{count} += $_tempfile_info{count}
if defined $_tempfile_info{count};
$tempfile_info{size} += $_tempfile_info{size}
if defined $_tempfile_info{size};
$tempfile_info{maxsize} = $_tempfile_info{maxsize}
if defined $_tempfile_info{maxsize} and ( not defined $tempfile_info{maxsize}
or $tempfile_info{maxsize} < $_tempfile_info{maxsize} );
foreach my $day ( %{ $_tempfile_info{chronos} } ) {
foreach my $hour ( %{ $_tempfile_info{chronos}{$day} } ) {
$tempfile_info{chronos}{$day}{$hour}{count} +=
$_tempfile_info{chronos}{$day}{$hour}{count}
if defined $_tempfile_info{chronos}{$day}{$hour}{count};
$tempfile_info{chronos}{$day}{$hour}{size} +=
$_tempfile_info{chronos}{$day}{$hour}{size}
if defined $_tempfile_info{chronos}{$day}{$hour}{size};
}
}
### top_slowest ###
my @tmp_top_slowest = sort {$b->[0] <=> $a->[0]} (@top_slowest, @_top_slowest);
@top_slowest = ();
for (my $i = 0; $i <= $#tmp_top_slowest; $i++) {
last if ($i == $end_top);
push(@top_slowest, $tmp_top_slowest[$i]);
}
### checkpoint_info ###
$checkpoint_info{file_removed} += $_checkpoint_info{file_removed};
$checkpoint_info{sync} += $_checkpoint_info{sync};
$checkpoint_info{wbuffer} += $_checkpoint_info{wbuffer};
$checkpoint_info{file_recycled} += $_checkpoint_info{file_recycled};
$checkpoint_info{total} += $_checkpoint_info{total};
$checkpoint_info{file_added} += $_checkpoint_info{file_added};
$checkpoint_info{write} += $_checkpoint_info{write};
foreach my $day (keys %{ $_checkpoint_info{chronos} }) {
foreach my $hour (keys %{ $_checkpoint_info{chronos}{$day} }) {
$checkpoint_info{chronos}{$day}{$hour}{file_removed} += $_checkpoint_info{chronos}{$day}{$hour}{file_removed};
$checkpoint_info{chronos}{$day}{$hour}{sync} += $_checkpoint_info{chronos}{$day}{$hour}{sync};
$checkpoint_info{chronos}{$day}{$hour}{wbuffer} += $_checkpoint_info{chronos}{$day}{$hour}{wbuffer};
$checkpoint_info{chronos}{$day}{$hour}{file_recycled} += $_checkpoint_info{chronos}{$day}{$hour}{file_recycled};
$checkpoint_info{chronos}{$day}{$hour}{total} += $_checkpoint_info{chronos}{$day}{$hour}{total};
$checkpoint_info{chronos}{$day}{$hour}{file_added} += $_checkpoint_info{chronos}{$day}{$hour}{file_added};
$checkpoint_info{chronos}{$day}{$hour}{write} += $_checkpoint_info{chronos}{$day}{$hour}{write};
}
}
### restartpoint_info ###
$restartpoint_info{sync} += $_restartpoint_info{sync};
$restartpoint_info{wbuffer} += $_restartpoint_info{wbuffer};
$restartpoint_info{total} += $_restartpoint_info{total};
$restartpoint_info{write} += $_restartpoint_info{write};
foreach my $day (keys %{ $_restartpoint_info{chronos} }) {
foreach my $hour (keys %{ $_restartpoint_info{chronos}{$day} }) {
$restartpoint_info{chronos}{$day}{$hour}{sync} += $_restartpoint_info{chronos}{$day}{$hour}{sync};
$restartpoint_info{chronos}{$day}{$hour}{wbuffer} += $_restartpoint_info{chronos}{$day}{$hour}{wbuffer};
$restartpoint_info{chronos}{$day}{$hour}{total} += $_restartpoint_info{chronos}{$day}{$hour}{total};
$restartpoint_info{chronos}{$day}{$hour}{write} += $_restartpoint_info{chronos}{$day}{$hour}{write};
}
}
#### Autovacuum infos ####
$autovacuum_info{count} += $_autovacuum_info{count};
foreach my $day (keys %{ $_autovacuum_info{chronos} }) {
foreach my $hour (keys %{ $_autovacuum_info{chronos}{$day} }) {
$autovacuum_info{chronos}{$day}{$hour}{count} += $_autovacuum_info{chronos}{$day}{$hour}{count};
}
}
foreach my $table (keys %{ $_autovacuum_info{tables} }) {
$autovacuum_info{tables}{$table}{vacuums} += $_autovacuum_info{tables}{$table}{vacuums};
$autovacuum_info{tables}{$table}{idxscans} += $_autovacuum_info{tables}{$table}{idxscans};
$autovacuum_info{tables}{$table}{tuples}{removed} += $_autovacuum_info{tables}{$table}{tuples}{removed};
$autovacuum_info{tables}{$table}{pages}{removed} += $_autovacuum_info{tables}{$table}{pages}{removed};
}
#### Autoanalyze infos ####
$autoanalyze_info{count} += $_autoanalyze_info{count};
foreach my $day (keys %{ $_autoanalyze_info{chronos} }) {
foreach my $hour (keys %{ $_autoanalyze_info{chronos}{$day} }) {
$autoanalyze_info{chronos}{$day}{$hour}{count} += $_autoanalyze_info{chronos}{$day}{$hour}{count};
}
}
foreach my $table (keys %{ $_autoanalyze_info{tables} }) {
$autoanalyze_info{tables}{$table}{analyzes} += $_autoanalyze_info{tables}{$table}{analyzes};
}
return;
}
sub dump_as_binary
{
my $lfh = shift();
store_fd({
'overall_stat' => \%overall_stat,
'normalyzed_info' => \%normalyzed_info,
'error_info' => \%error_info,
'connection_info' => \%connection_info,
'database_info' => \%database_info,
'application_info' => \%application_info,
'checkpoint_info' => \%checkpoint_info,
'restartpoint_info' => \%restartpoint_info,
'session_info' => \%session_info,
'tempfile_info' => \%tempfile_info,
'error_info' => \%error_info,
'logs_type' => \%logs_type,
'lock_info' => \%lock_info,
'per_hour_info' => \%per_hour_info,
'per_minute_info' => \%per_minute_info,
'top_slowest' => \@top_slowest,
'nlines' => $nlines,
'log_files' => \@log_files,
'autovacuum_info' => \%autovacuum_info,
'autoanalyze_info' => \%autoanalyze_info
}, $lfh) || die ("Couldn't save binary data to «$outfile»!\n");
}
# Highlight SQL code
sub highlight_code
{
my $code = shift;
# Try to escape HTML code
$code =~ s/<([\/a-zA-Z])\b/\<$1/sg;
# Do not try to prettify queries longuer
# than 10KB this will take too much time
return $code if (length($code) > 10240);
# prettify SQL query
if (!$noprettify) {
$sql_prettified->query($code);
$code = $sql_prettified->beautify;
}
return $code if ($nohighlight);
my $i = 0;
my @qqcode = ();
while ($code =~ s/("[^\"]*")/QQCODEY${i}A/s) {
push(@qqcode, $1);
$i++;
}
$i = 0;
my @qcode = ();
while ($code =~ s/('[^\']*')/QCODEY${i}B/s) {
push(@qcode, $1);
$i++;
}
foreach my $x (keys %SYMBOLS) {
$code =~ s/$x/\$\$STYLESY0A\$\$$SYMBOLS{$x}\$\$STYLESY0B\$\$/gs;
}
for (my $x = 0 ; $x <= $#KEYWORDS1 ; $x++) {
$code =~ s/\b$KEYWORDS1[$x]\b/$KEYWORDS1[$x]<\/span>/igs;
$code =~ s/(?$KEYWORDS1[$x]<\/span>/igs;
}
for (my $x = 0 ; $x <= $#KEYWORDS2 ; $x++) {
$code =~ s/(?$KEYWORDS2[$x]<\/span>/igs;
}
for (my $x = 0 ; $x <= $#KEYWORDS3 ; $x++) {
$code =~ s/\b$KEYWORDS3[$x]\b/$KEYWORDS3[$x]<\/span>/igs;
}
for (my $x = 0 ; $x <= $#BRACKETS ; $x++) {
$code =~ s/($BRACKETS[$x])/$1<\/span>/igs;
}
$code =~ s/\$\$STYLESY0A\$\$([^\$]+)\$\$STYLESY0B\$\$/$1<\/span>/gs;
$code =~ s/\b(\d+)\b/$1<\/span>/igs;
for (my $x = 0; $x <= $#qcode; $x++) {
$code =~ s/QCODEY${x}B/$qcode[$x]/s;
}
for (my $x = 0; $x <= $#qqcode; $x++) {
$code =~ s/QQCODEY${x}A/$qqcode[$x]/s;
}
$code =~ s/('[^']*')/$1<\/span>/gs;
$code =~ s/(`[^`]*`)/$1<\/span>/gs;
return $code;
}
sub compute_arg_list
{
# Some command lines arguments can be used multiple time or be written
# as a coma separated list.
# For example: --dbuser=postgres --dbuser=joe or --dbuser=postgres,joe
# So we have to aggregate all the possible value
my @tmp = ();
foreach my $v (@exclude_user) {
push(@tmp, split(/,/, $v));
}
@exclude_user = ();
push(@exclude_user, @tmp);
@tmp = ();
foreach my $v (@dbname) {
push(@tmp, split(/,/, $v));
}
@dbname = ();
push(@dbname, @tmp);
@tmp = ();
foreach my $v (@dbuser) {
push(@tmp, split(/,/, $v));
}
@dbuser = ();
push(@dbuser, @tmp);
@tmp = ();
foreach my $v (@dbclient) {
push(@tmp, split(/,/, $v));
}
@dbclient = ();
push(@dbclient, @tmp);
@tmp = ();
foreach my $v (@dbappname) {
push(@tmp, split(/,/, $v));
}
@dbappname = ();
push(@dbappname, @tmp);
}
sub validate_log_line
{
my ($t_pid) = @_;
# Check user and/or database if require
if ($#dbname >= 0) {
# Log line do not match the required dbname
if (!$prefix_vars{'t_dbname'} || !grep(/^$prefix_vars{'t_dbname'}$/i, @dbname)) {
delete $cur_info{$t_pid};
return 0;
}
}
if ($#dbuser >= 0) {
# Log line do not match the required dbuser
if (!$prefix_vars{'t_dbuser'} || !grep(/^$prefix_vars{'t_dbuser'}$/i, @dbuser)) {
delete $cur_info{$t_pid};
return 0;
}
}
if ($#dbclient >= 0) {
# Log line does not match the required dbclient
$prefix_vars{'t_client'} ||= $prefix_vars{'t_hostport'};
if (!$prefix_vars{'t_client'} || !grep(/^$prefix_vars{'t_client'}$/i, @dbclient)) {
delete $cur_info{$t_pid};
return 0;
}
}
if ($#dbappname >= 0) {
# Log line does not match the required dbname
if (!$prefix_vars{'t_appname'} || !grep(/^$prefix_vars{'t_appname'}$/i, @dbappname)) {
delete $cur_info{$t_pid};
return 0;
}
}
if ($#exclude_user >= 0) {
# Log line matches the excluded dbuser
if ($prefix_vars{'t_dbuser'} && grep(/^$prefix_vars{'t_dbuser'}$/i, @exclude_user)) {
delete $cur_info{$t_pid};
return 0;
}
}
return 1;
}
sub parse_log_prefix
{
my ($t_logprefix) = @_;
# Extract user and database information from the logprefix part
if ($t_logprefix) {
# Search for database user
if ($t_logprefix =~ $regex_prefix_dbuser) {
$prefix_vars{'t_dbuser'} = $1;
}
# Search for database name
if ($t_logprefix =~ $regex_prefix_dbname) {
$prefix_vars{'t_dbname'} = $1;
}
}
}
sub parse_query
{
my $t_pid = $prefix_vars{'t_pid'};
# Force parameter change to be a hint message so that it can appear
# in the event/error/warning messages report part.
if ($prefix_vars{'t_loglevel'} eq 'LOG') {
if ($prefix_vars{'t_query'} =~ /parameter "[^"]+" changed to "[^"]+"/) {
$prefix_vars{'t_loglevel'} = 'HINT';
} elsif ($prefix_vars{'t_query'} =~ /database system was shut down at /) {
$prefix_vars{'t_loglevel'} = 'HINT';
}
}
# Do not parse lines that are not an error like message
if ($error_only && ($prefix_vars{'t_loglevel'} !~ /(WARNING|ERROR|FATAL|PANIC|DETAIL|HINT|STATEMENT|CONTEXT)/)) {
if (exists $cur_info{$t_pid} && (!$prefix_vars{'t_session_line'} || ($prefix_vars{'t_session_line'} != $cur_info{$t_pid}{session}))) {
&store_queries($t_pid);
delete $cur_info{$t_pid};
}
return;
}
# Do not parse lines that are an error like message
if ($disable_error && ($prefix_vars{'t_loglevel'} =~ /WARNING|ERROR|FATAL|PANIC|HINT|CONTEXT|DETAIL|STATEMENT/)) {
if (exists $cur_info{$t_pid} && (!$prefix_vars{'t_session_line'} || ($prefix_vars{'t_session_line'} != $cur_info{$t_pid}{session}))) {
&store_queries($t_pid);
delete $cur_info{$t_pid};
}
return;
}
# Store a counter of logs type
$logs_type{$prefix_vars{'t_loglevel'}}++;
# Replace syslog tabulation rewrite
$prefix_vars{'t_query'} =~ s/#011/\t/g if ($format =~ /syslog/);
my $date_part = "$prefix_vars{'t_year'}$prefix_vars{'t_month'}$prefix_vars{'t_day'}";
# Stores lock activity
if (($prefix_vars{'t_loglevel'} eq 'LOG') && ($prefix_vars{'t_query'} =~ /acquired ([^\s]+) on ([^\s]+) .* after ([0-9\.]+) ms/))
{
return if ($disable_lock);
$lock_info{$1}{count}++;
$lock_info{$1}{duration} += $3;
$lock_info{$1}{$2}{count}++;
$lock_info{$1}{$2}{duration} += $3;
$lock_info{$1}{chronos}{$date_part}{$prefix_vars{'t_hour'}}{count}++;
$lock_info{$1}{chronos}{$date_part}{$prefix_vars{'t_hour'}}{duration}++;
# Store current lock information that will be used later
# when we will parse the query responsible of the locks
$cur_lock_info{$t_pid}{wait} = $3;
return;
}
# Stores query related to last lock information
if (($prefix_vars{'t_loglevel'} eq 'STATEMENT') && exists $cur_lock_info{$t_pid}) {
$cur_lock_info{$t_pid}{query} = $prefix_vars{'t_query'};
$cur_lock_info{$t_pid}{timestamp} = $prefix_vars{'t_timestamp'};
$cur_lock_info{$t_pid}{dbname} = $prefix_vars{'t_dbname'};
$cur_lock_info{$t_pid}{dbuser} = $prefix_vars{'t_dbuser'};
$cur_lock_info{$t_pid}{dbclient} = $prefix_vars{'t_client'};
$cur_lock_info{$t_pid}{dbappname} = $prefix_vars{'t_appname'};
$cur_lock_info{$t_pid}{timestamp} = $prefix_vars{'t_timestamp'};
return;
}
# Stores temporary files activity
if (($prefix_vars{'t_loglevel'} eq 'LOG') && ($prefix_vars{'t_query'} =~ /temporary file: path .*, size (\d+)/)) {
return if ($disable_temporary);
$tempfile_info{count}++;
$tempfile_info{size} += $1;
$tempfile_info{chronos}{$date_part}{$prefix_vars{'t_hour'}}{count}++;
$tempfile_info{chronos}{$date_part}{$prefix_vars{'t_hour'}}{size} += $1;
$tempfile_info{maxsize} = $1 if ($tempfile_info{maxsize} < $1);
# Store current temporary file information that will be used later
# when we will parse the query responsible of the tempfile
$cur_temp_info{$t_pid}{size} = $1;
return;
}
# Stores query related to last created temporary file
if (($prefix_vars{'t_loglevel'} eq 'STATEMENT') && exists $cur_temp_info{$t_pid}) {
$cur_temp_info{$t_pid}{query} = $prefix_vars{'t_query'};
$cur_temp_info{$t_pid}{timestamp} = $prefix_vars{'t_timestamp'};
$cur_temp_info{$t_pid}{dbname} = $prefix_vars{'t_dbname'};
$cur_temp_info{$t_pid}{dbuser} = $prefix_vars{'t_dbuser'};
$cur_temp_info{$t_pid}{dbclient} = $prefix_vars{'t_client'};
$cur_temp_info{$t_pid}{dbappname} = $prefix_vars{'t_appname'};
$cur_temp_info{$t_pid}{timestamp} = $prefix_vars{'t_timestamp'};
return;
}
# Stores pre-connection activity
if (($prefix_vars{'t_loglevel'} eq 'LOG') && ($prefix_vars{'t_query'} =~ /connection received: host=([^\s]+) port=(\d+)/)) {
return if ($disable_connection);
$conn_received{$t_pid} = $1;
return;
}
# Stores connection activity
if ( ($prefix_vars{'t_loglevel'} eq 'LOG')
&& ($prefix_vars{'t_query'} =~ /connection authorized: user=([^\s]+) database=([^\s]+)/))
{
return if ($disable_connection);
my $usr = $1;
my $db = $2;
if ($extension eq 'tsung') {
$tsung_session{$prefix_vars{'t_pid'}}{connection}{database} = $db;
$tsung_session{$prefix_vars{'t_pid'}}{connection}{user} = $usr;
$tsung_session{$prefix_vars{'t_pid'}}{connection}{date} = $prefix_vars{'t_date'};
return;
}
$connection_info{count}++;
$connection_info{user}{$usr}++;
$connection_info{database}{$db}++;
$connection_info{database_user}{$db}{$usr}++;
$connection_info{chronos}{$date_part}{$prefix_vars{'t_hour'}}{count}++;
$connection_info{chronos}{$date_part}{$prefix_vars{'t_hour'}}{user}{$usr}++;
$connection_info{chronos}{$date_part}{$prefix_vars{'t_hour'}}{database}{$db}++;
$connection_info{chronos}{$date_part}{$prefix_vars{'t_hour'}}{database_user}{$db}{$usr}++;
if ($graph) {
$per_minute_info{connection}{$date_part}{$prefix_vars{'t_hour'}}{"$prefix_vars{'t_min'}"}{count}++;
$per_minute_info{connection}{$date_part}{$prefix_vars{'t_hour'}}{"$prefix_vars{'t_min'}"}{second}
{$prefix_vars{'t_sec'}}++;
}
if (exists $conn_received{$t_pid}) {
$connection_info{host}{$conn_received{$t_pid}}++;
$connection_info{chronos}{$date_part}{$prefix_vars{'t_hour'}}{host}{$conn_received{$t_pid}}++;
delete $conn_received{$t_pid};
}
return;
}
# Store session duration
if (($prefix_vars{'t_loglevel'} eq 'LOG')
&& ($prefix_vars{'t_query'} =~
/disconnection: session time: ([^\s]+) user=([^\s]+) database=([^\s]+) host=([^\s]+)/))
{
return if ($disable_session);
if ($extension eq 'tsung') {
$tsung_session{$prefix_vars{'t_pid'}}{disconnection}{date} = $prefix_vars{'t_timestamp'};
}
my $time = $1;
my $usr = $2;
my $db = $3;
my $host = $4;
if ($extension eq 'tsung') {
&store_tsung_session($prefix_vars{'t_pid'});
return;
}
# Store time in millisecond
$time =~ /(\d+):(\d+):(\d+\.\d+)/;
$time = ($3 * 1000) + ($2 * 60 * 1000) + ($1 * 60 * 60 * 1000);
$session_info{count}++;
$session_info{duration} += $time;
$session_info{chronos}{$date_part}{$prefix_vars{'t_hour'}}{count}++;
$session_info{chronos}{$date_part}{$prefix_vars{'t_hour'}}{duration} += $time;
$session_info{database}{$db}{count}++;
$session_info{database}{$db}{duration} += $time;
$session_info{user}{$usr}{count}++;
$session_info{user}{$usr}{duration} += $time;
$session_info{host}{$host}{count}++;
$session_info{host}{$host}{duration} += $time;
return;
}
# Store autovacuum information
if (
($prefix_vars{'t_loglevel'} eq 'LOG')
&& ($prefix_vars{'t_query'} =~
/automatic vacuum of table "([^\s]+)": index scans: (\d+)/
)
)
{
return if ($disable_autovacuum);
$autovacuum_info{count}++;
$autovacuum_info{tables}{$1}{vacuums} += 1;
$autovacuum_info{tables}{$1}{idxscans} += $2;
$autovacuum_info{chronos}{$date_part}{$prefix_vars{'t_hour'}}{count}++;
$cur_info{$t_pid}{vacuum} = $1;
return;
}
if (
($prefix_vars{'t_loglevel'} eq 'LOG')
&& ($prefix_vars{'t_query'} =~
/automatic analyze of table "([^\s]+)"/
)
)
{
return if ($disable_autovacuum);
$autoanalyze_info{count}++;
$autoanalyze_info{tables}{$1}{analyzes} += 1;
$autoanalyze_info{chronos}{$date_part}{$prefix_vars{'t_hour'}}{count}++;
}
# Store checkpoint information
if (
($prefix_vars{'t_loglevel'} eq 'LOG')
&& ($prefix_vars{'t_query'} =~
/checkpoint complete: wrote (\d+) buffers \(([^\)]+)\); (\d+) transaction log file\(s\) added, (\d+) removed, (\d+) recycled; write=([0-9\.]+) s, sync=([0-9\.]+) s, total=([0-9\.]+) s/
)
)
{
return if ($disable_checkpoint);
$checkpoint_info{wbuffer} += $1;
#$checkpoint_info{percent_wbuffer} += $2;
$checkpoint_info{file_added} += $3;
$checkpoint_info{file_removed} += $4;
$checkpoint_info{file_recycled} += $5;
$checkpoint_info{write} += $6;
$checkpoint_info{sync} += $7;
$checkpoint_info{total} += $8;
$checkpoint_info{chronos}{$date_part}{$prefix_vars{'t_hour'}}{wbuffer} += $1;
#$checkpoint_info{chronos}{$date_part}{$prefix_vars{'t_hour'}}{percent_wbuffer} += $2;
$checkpoint_info{chronos}{$date_part}{$prefix_vars{'t_hour'}}{file_added} += $3;
$checkpoint_info{chronos}{$date_part}{$prefix_vars{'t_hour'}}{file_removed} += $4;
$checkpoint_info{chronos}{$date_part}{$prefix_vars{'t_hour'}}{file_recycled} += $5;
$checkpoint_info{chronos}{$date_part}{$prefix_vars{'t_hour'}}{write} += $6;
$checkpoint_info{chronos}{$date_part}{$prefix_vars{'t_hour'}}{sync} += $7;
$checkpoint_info{chronos}{$date_part}{$prefix_vars{'t_hour'}}{total} += $8;
return;
}
if ( ($prefix_vars{'t_loglevel'} eq 'LOG')
&& ($prefix_vars{'t_query'} =~ /checkpoints are occurring too frequently \((\d+) seconds apart\)/))
{
return if ($disable_checkpoint);
$checkpoint_info{warning}++;
$checkpoint_info{warning_seconds} += $1;
$checkpoint_info{chronos}{$date_part}{$prefix_vars{'t_hour'}}{warning}++;
$checkpoint_info{chronos}{$date_part}{$prefix_vars{'t_hour'}}{warning_seconds} += $1;
return;
}
# Store restartpoint information
if (
($prefix_vars{'t_loglevel'} eq 'LOG')
&& ($prefix_vars{'t_query'} =~
/restartpoint complete: wrote (\d+) buffers \(([^\)]+)\); write=([0-9\.]+) s, sync=([0-9\.]+) s, total=([0-9\.]+) s/
)
)
{
return if ($disable_checkpoint);
$restartpoint_info{wbuffer} += $1;
#$restartpoint_info{percent_wbuffer} += $2;
$restartpoint_info{write} += $3;
$restartpoint_info{sync} += $4;
$restartpoint_info{total} += $5;
$restartpoint_info{chronos}{$date_part}{$prefix_vars{'t_hour'}}{wbuffer} += $1;
#$restartpoint_info{chronos}{$date_part}{$prefix_vars{'t_hour'}}{percent_wbuffer} += $2;
$restartpoint_info{chronos}{$date_part}{$prefix_vars{'t_hour'}}{write} += $3;
$restartpoint_info{chronos}{$date_part}{$prefix_vars{'t_hour'}}{sync} += $4;
$restartpoint_info{chronos}{$date_part}{$prefix_vars{'t_hour'}}{total} += $5;
return;
}
# Store the detail of the error
if ($cur_info{$t_pid}{loglevel} =~ /WARNING|ERROR|FATAL|PANIC/) {
if ($prefix_vars{'t_loglevel'} =~ /(DETAIL|STATEMENT|CONTEXT|HINT)/) {
$cur_info{$t_pid}{"\L$1\E"} .= $prefix_vars{'t_query'};
return;
}
}
# Process current query following context
if ($cur_info{$t_pid}{query}) {
# Remove obsolete connection storage
delete $conn_received{$cur_info{$t_pid}{pid}};
# The query is complete but we are missing some debug/info/bind parameter logs
if ($cur_info{$t_pid}{loglevel} eq 'LOG') {
# Apply bind parameters if any
if (($prefix_vars{'t_loglevel'} eq 'DETAIL') && ($prefix_vars{'t_query'} =~ /parameters: (.*)/)) {
$cur_info{$t_pid}{parameters} = "$1";
# go look at other params
return;
}
}
# When we are ready to overwrite the last storage, add it to the global stats
if ( ($prefix_vars{'t_loglevel'} =~ /LOG|FATAL|PANIC|ERROR|WARNING|HINT/)
&& exists $cur_info{$t_pid}
&& (($format eq 'csv') || (!$prefix_vars{'t_session_line'} || ($prefix_vars{'t_session_line'} != $cur_info{$t_pid}{session})))
) {
&store_queries($t_pid);
delete $cur_info{$t_pid};
}
}
# Registrer previous query storage into global statistics before starting to store current query
if (exists $cur_info{$t_pid} && (!$prefix_vars{'t_session_line'} || ($prefix_vars{'t_session_line'} != $cur_info{$t_pid}{session}))) {
&store_queries($t_pid);
delete $cur_info{$t_pid};
}
# Log lines with duration only, generated by log_duration = on in postgresql.conf
if ($prefix_vars{'t_query'} =~ s/duration: ([0-9\.]+) ms$//s) {
$prefix_vars{'t_duration'} = $1;
$prefix_vars{'t_query'} = '';
&set_current_infos($t_pid);
return;
}
# Store info as tsung session following the output file extension
if (($extension eq 'tsung') && !exists $tsung_session{$prefix_vars{'t_pid'}}{connection} && $prefix_vars{'t_dbname'}) {
$tsung_session{$prefix_vars{'t_pid'}}{connection}{database} = $prefix_vars{'t_dbname'};
$tsung_session{$prefix_vars{'t_pid'}}{connection}{user} = $prefix_vars{'t_dbuser'};
$tsung_session{$prefix_vars{'t_pid'}}{connection}{date} = $prefix_vars{'t_date'};
}
my $t_action = '';
# Store query duration generated by log_min_duration >= 0 in postgresql.conf
if ($prefix_vars{'t_query'} =~ s/duration: ([0-9\.]+) ms (query|statement): //is) {
$prefix_vars{'t_duration'} = $1;
$t_action = $2;
# Log line with duration and statement from prepared queries
} elsif ($prefix_vars{'t_query'} =~ s/duration: ([0-9\.]+) ms (prepare|parse|bind|execute|execute from fetch)\s+[^:]+:\s//is)
{
$prefix_vars{'t_duration'} = $1;
$t_action = $2;
# Skipping parse and bind logs
return if ($t_action !~ /query|statement|execute/);
# Log line without duration at all
} elsif ($prefix_vars{'t_query'} =~ s/(query|statement): //is) {
$t_action = $1;
# Log line without duration at all from prepared queries
} elsif ($prefix_vars{'t_query'} =~ s/(prepare|parse|bind|execute|execute from fetch)\s+[^:]+:\s//is)
{
$t_action = $1;
# Skipping parse and bind logs
return if ($t_action !~ /query|statement|execute/);
# Log line that should not be parse
} elsif ($prefix_vars{'t_loglevel'} eq 'LOG') {
if ($prefix_vars{'t_query'} !~
/incomplete startup packet|connection|receive|unexpected EOF|still waiting for [^\s]+Lock|checkpoint starting:|could not send data to client|parameter .*configuration file|autovacuum launcher|automatic (analyze|vacuum)|detected deadlock while waiting for|database system was shut down/
)
{
&logmsg('DEBUG', "Unrecognized line: $prefix_vars{'t_loglevel'}: $prefix_vars{'t_query'} at line $nlines");
}
if (exists $cur_info{$t_pid} && (!$prefix_vars{'t_session_line'} || ($prefix_vars{'t_session_line'} != $cur_info{$t_pid}{session}))) {
&store_queries($t_pid);
delete $cur_info{$t_pid};
}
return;
}
if ( ($format eq 'csv') && ($prefix_vars{'t_loglevel'} ne 'LOG')) {
$cur_info{$t_pid}{detail} = $prefix_vars{'t_detail'};
$cur_info{$t_pid}{hint} = $prefix_vars{'t_hint'};
$cur_info{$t_pid}{context} = $prefix_vars{'t_context'};
$cur_info{$t_pid}{statement} = $prefix_vars{'t_statement'}
}
&set_current_infos($t_pid);
return 1;
}
sub set_current_infos
{
my $t_pid = shift;
$cur_info{$t_pid}{year} = $prefix_vars{'t_year'};
$cur_info{$t_pid}{month} = $prefix_vars{'t_month'};
$cur_info{$t_pid}{day} = $prefix_vars{'t_day'};
$cur_info{$t_pid}{hour} = $prefix_vars{'t_hour'};
$cur_info{$t_pid}{min} = $prefix_vars{'t_min'};
$cur_info{$t_pid}{sec} = $prefix_vars{'t_sec'};
$cur_info{$t_pid}{timestamp} = $prefix_vars{'t_timestamp'};
$cur_info{$t_pid}{ident} = $prefix_vars{'t_ident'};
$cur_info{$t_pid}{query} = $prefix_vars{'t_query'};
$cur_info{$t_pid}{duration} = $prefix_vars{'t_duration'};
$cur_info{$t_pid}{pid} = $prefix_vars{'t_pid'};
$cur_info{$t_pid}{session} = $prefix_vars{'t_session_line'};
$cur_info{$t_pid}{loglevel} = $prefix_vars{'t_loglevel'};
$cur_info{$t_pid}{dbname} = $prefix_vars{'t_dbname'};
$cur_info{$t_pid}{dbuser} = $prefix_vars{'t_dbuser'};
$cur_info{$t_pid}{dbclient} = $prefix_vars{'t_client'};
$cur_info{$t_pid}{dbappname} = $prefix_vars{'t_appname'};
$cur_info{$t_pid}{date} = $prefix_vars{'t_date'};
}
sub store_tsung_session
{
my $pid = shift;
return if ($#{$tsung_session{$pid}{dates}} < 0);
# Open filehandle
my $fh = new IO::File ">>$outfile";
if (not defined $fh) {
die "FATAL: can't write to $outfile, $!\n";
}
if ($pid) {
print $fh " \n";
if (exists $tsung_session{$pid}{connection}{database}) {
print $fh qq{
};
}
if ($#{$tsung_session{$pid}{dates}} >= 0) {
my $sec = 0;
if ($tsung_session{$pid}{connection}{date}) {
$sec = $tsung_session{$pid}{dates}[0] - $tsung_session{$pid}{connection}{date};
}
print $fh " \n" if ($sec > 0);
print $fh " \n";
for (my $i = 0 ; $i <= $#{$tsung_session{$pid}{queries}} ; $i++) {
$tsung_queries++;
$sec = 0;
if ($i > 0) {
$sec = $tsung_session{$pid}{dates}[$i] - $tsung_session{$pid}{dates}[$i - 1];
print $fh " \n" if ($sec > 0);
}
print $fh " \n";
}
print $fh " \n";
}
if ($#{$tsung_session{$pid}{dates}} >= 0) {
my $sec = $tsung_session{$pid}{disconnection}{date} - $tsung_session{$pid}{dates}[-1];
print $fh " \n" if ($sec > 0);
}
if (exists $tsung_session{$pid}{connection}{database}) {
print $fh " \n";
}
print $fh " \n\n";
delete $tsung_session{$pid};
}
$fh->close;
}
sub store_queries
{
my $t_pid = shift;
# Remove comments if required
if ($remove_comment) {
$cur_info{$t_pid}{query} =~ s/\/\*(.*?)\*\///gs;
}
# Cleanup and normalize the current query
$cur_info{$t_pid}{query} =~ s/^[\t\s\r\n]+//s;
$cur_info{$t_pid}{query} =~ s/[\t\s\r\n;]+$//s;
# Replace bind parameters values in the query if any
if (exists $cur_info{$t_pid}{parameters}) {
my @t_res = split(/[,\s]*\$(\d+)\s=\s/, $cur_info{$t_pid}{parameters});
shift(@t_res);
for (my $i = 0 ; $i < $#t_res ; $i += 2) {
$cur_info{$t_pid}{query} =~ s/\$$t_res[$i]\b/$t_res[$i+1]/s;
}
}
# We only process stored object with query here
if ($cur_info{$t_pid}{query}) {
# Should we just want select queries
if ($select_only) {
return if (($cur_info{$t_pid}{query} !~ /^SELECT/is) || ($cur_info{$t_pid}{query} =~ /FOR UPDATE/is));
}
# Should we have to exclude some queries
if ($#exclude_query >= 0) {
foreach (@exclude_query) {
if ($cur_info{$t_pid}{query} =~ /$_/i) {
$cur_info{$t_pid}{query} = '';
return;
}
}
}
# Should we have to include only some queries
if ($#include_query >= 0) {
foreach (@include_query) {
if ($cur_info{$t_pid}{query} !~ /$_/i) {
$cur_info{$t_pid}{query} = '';
return;
}
}
}
# Truncate the query if requested by the user
$cur_info{$t_pid}{query} = substr($cur_info{$t_pid}{query}, 0, $maxlength) . '[...]'
if (($maxlength > 0) && (length($cur_info{$t_pid}{query}) > $maxlength));
# Dump queries as tsung request and return
if ($extension eq 'tsung') {
if ($cur_info{$t_pid}{loglevel} eq 'LOG') {
push(@{$tsung_session{$t_pid}{queries}}, $cur_info{$t_pid}{query});
push(@{$tsung_session{$t_pid}{dates}}, $cur_info{$t_pid}{date});
if (!exists $tsung_session{$t_pid}{connection} && $cur_info{$t_pid}{dbname}) {
$tsung_session{$t_pid}{connection}{database} = $cur_info{$t_pid}{dbname};
$tsung_session{$t_pid}{connection}{user} = $cur_info{$t_pid}{dbuser};
$tsung_session{$t_pid}{connection}{date} = $cur_info{$t_pid}{date};
}
}
return;
}
}
my $cur_day_str = "$cur_info{$t_pid}{year}$cur_info{$t_pid}{month}$cur_info{$t_pid}{day}";
my $cur_hour_str = "$cur_info{$t_pid}{hour}";
# Store the collected information into global statistics
if ($cur_info{$t_pid}{loglevel} =~ /WARNING|ERROR|FATAL|PANIC|HINT/) {
# Add log level at beginning of the query and normalize it
$cur_info{$t_pid}{query} = $cur_info{$t_pid}{loglevel} . ": " . $cur_info{$t_pid}{query};
my $normalized_error = &normalize_error($cur_info{$t_pid}{query});
# Stores total and normalized error count
$overall_stat{'errors_number'}++;
$overall_stat{'unique_normalized_errors'}{"$normalized_error"}++;
$error_info{$normalized_error}{count}++;
# Stores normalized error count per time
$error_info{$normalized_error}{chronos}{"$cur_day_str"}{"$cur_hour_str"}{count}++;
# Stores normalized query samples
my $cur_last_log_timestamp = "$cur_info{$t_pid}{year}-$cur_info{$t_pid}{month}-$cur_info{$t_pid}{day} " .
"$cur_info{$t_pid}{hour}:$cur_info{$t_pid}{min}:$cur_info{$t_pid}{sec}";
&set_top_error_sample(
$normalized_error, $cur_last_log_timestamp, $cur_info{$t_pid}{query}, $cur_info{$t_pid}{detail},
$cur_info{$t_pid}{context}, $cur_info{$t_pid}{statement}, $cur_info{$t_pid}{hint}, $cur_info{$t_pid}{dbname}
);
} elsif ($cur_info{$t_pid}{loglevel} eq 'LOG') {
# Stores global statistics
$overall_stat{'queries_number'}++;
$overall_stat{'queries_duration'} += $cur_info{$t_pid}{duration} if ($cur_info{$t_pid}{duration});
my $cur_last_log_timestamp = "$cur_info{$t_pid}{year}-$cur_info{$t_pid}{month}-$cur_info{$t_pid}{day} " .
"$cur_info{$t_pid}{hour}:$cur_info{$t_pid}{min}:$cur_info{$t_pid}{sec}";
if (!$overall_stat{'first_query_ts'} || ($overall_stat{'first_query_ts'} gt $cur_last_log_timestamp)) {
$overall_stat{'first_query_ts'} = $cur_last_log_timestamp;
}
if (!$overall_stat{'last_query_ts'} || ($overall_stat{'last_query_ts'} lt $cur_last_log_timestamp)) {
$overall_stat{'last_query_ts'} = $cur_last_log_timestamp;
}
$overall_stat{'query_peak'}{$cur_last_log_timestamp}++;
$per_hour_info{"$cur_day_str"}{"$cur_hour_str"}{count}++;
if ($cur_info{$t_pid}{duration}) {
$per_hour_info{"$cur_day_str"}{"$cur_hour_str"}{duration} += $cur_info{$t_pid}{duration};
# Store min / max duration
if (!exists $per_hour_info{"$cur_day_str"}{"$cur_hour_str"}{min} || ($per_hour_info{"$cur_day_str"}{"$cur_hour_str"}{min} > $cur_info{$t_pid}{duration})) {
$per_hour_info{"$cur_day_str"}{"$cur_hour_str"}{min} = $cur_info{$t_pid}{duration};
}
if (!exists $per_hour_info{"$cur_day_str"}{"$cur_hour_str"}{max} || ($per_hour_info{"$cur_day_str"}{"$cur_hour_str"}{max} < $cur_info{$t_pid}{duration})) {
$per_hour_info{"$cur_day_str"}{"$cur_hour_str"}{max} = $cur_info{$t_pid}{duration};
}
}
if ($graph) {
$per_minute_info{query}{"$cur_day_str"}{"$cur_hour_str"}{$cur_info{$t_pid}{min}}{count}++;
$per_minute_info{query}{"$cur_day_str"}{"$cur_hour_str"}{$cur_info{$t_pid}{min}}{second}{$cur_info{$t_pid}{sec}}++;
$per_minute_info{query}{"$cur_day_str"}{"$cur_hour_str"}{$cur_info{$t_pid}{min}}{duration} += $cur_info{$t_pid}{duration} if ($cur_info{$t_pid}{duration});
}
# Counter per database and application name
if ($cur_info{$t_pid}{dbname}) {
$database_info{$cur_info{$t_pid}{dbname}}{count}++;
}
if ($cur_info{$t_pid}{dbappname}) {
$application_info{$cur_info{$t_pid}{dbappname}}{count}++;
} else {
$application_info{others}{count}++;
}
# Store normalized query temp file size if required
if (exists $cur_temp_info{$t_pid} && ($cur_temp_info{$t_pid} ne '') ) {
# Add a semi-colon at end of the query
$cur_temp_info{$t_pid}{query} .= ';' if (substr($cur_temp_info{$t_pid}{query}, -1, 1) ne ';');
# Normalize query
my $normalized = &normalize_query($cur_temp_info{$t_pid}{query});
$normalyzed_info{$normalized}{tempfiles}{size} += $cur_temp_info{$t_pid}{size};
$normalyzed_info{$normalized}{tempfiles}{count}++;
if ($normalyzed_info{$normalized}{tempfiles}{maxsize} < $cur_temp_info{$t_pid}{size}) {
$normalyzed_info{$normalized}{tempfiles}{maxsize} = $cur_temp_info{$t_pid}{size};
}
if (!exists($normalyzed_info{$normalized}{tempfiles}{minsize})
|| $normalyzed_info{$normalized}{tempfiles}{minsize} > $cur_temp_info{$t_pid}{size}) {
$normalyzed_info{$normalized}{tempfiles}{minsize} = $cur_temp_info{$t_pid}{size};
}
&set_top_tempfile_info($cur_temp_info{$t_pid}{query}, $cur_temp_info{$t_pid}{size}, $cur_temp_info{$t_pid}{timestamp}, $cur_temp_info{$t_pid}{dbname}, $cur_temp_info{$t_pid}{dbuser}, $cur_temp_info{$t_pid}{dbclient}, $cur_temp_info{$t_pid}{dbappname});
delete $cur_temp_info{$t_pid};
}
# Store normalized query that waited the most if required
if (exists $cur_lock_info{$t_pid}) {
# Add a semi-colon at end of the query
$cur_lock_info{$t_pid}{query} .= ';' if (substr($cur_lock_info{$t_pid}{query}, -1, 1) ne ';');
# Normalize query
my $normalized = &normalize_query($cur_lock_info{$t_pid}{query});
$normalyzed_info{$normalized}{locks}{wait} += $cur_lock_info{$t_pid}{wait};
$normalyzed_info{$normalized}{locks}{count}++;
if ($normalyzed_info{$normalized}{locks}{maxwait} < $cur_lock_info{$t_pid}{wait}) {
$normalyzed_info{$normalized}{locks}{maxwait} = $cur_lock_info{$t_pid}{wait};
}
if (!exists($normalyzed_info{$normalized}{locks}{minwait})
|| $normalyzed_info{$normalized}{locks}{minwait} > $cur_lock_info{$t_pid}{wait}) {
$normalyzed_info{$normalized}{locks}{minwait} = $cur_lock_info{$t_pid}{wait};
}
&set_top_locked_info($cur_lock_info{$t_pid}{query}, $cur_lock_info{$t_pid}{wait}, $cur_lock_info{$t_pid}{timestamp}, $cur_lock_info{$t_pid}{dbname}, $cur_lock_info{$t_pid}{dbuser}, $cur_lock_info{$t_pid}{dbclient}, $cur_lock_info{$t_pid}{dbappname});
delete $cur_lock_info{$t_pid};
}
if ($cur_info{$t_pid}{query}) {
# Add a semi-colon at end of the query
$cur_info{$t_pid}{query} .= ';' if (substr($cur_info{$t_pid}{query}, -1, 1) ne ';');
# Normalize query
my $normalized = &normalize_query($cur_info{$t_pid}{query});
foreach my $act (@action_regex) {
if ($normalized =~ $act) {
my $action = uc($1);
$overall_stat{$action}++;
$per_hour_info{"$cur_day_str"}{"$cur_hour_str"}{$action}{count}++;
$per_hour_info{"$cur_day_str"}{"$cur_hour_str"}{$action}{duration} += $cur_info{$t_pid}{duration} if ($cur_info{$t_pid}{duration});
if ($cur_info{$t_pid}{dbname}) {
$database_info{$cur_info{$t_pid}{dbname}}{$action}++;
}
if ($cur_info{$t_pid}{dbappname}) {
$application_info{$cur_info{$t_pid}{dbappname}}{$action}++;
} else {
$application_info{others}{$action}++;
}
last;
}
}
# Store normalized query count
$normalyzed_info{$normalized}{count}++;
# Store normalized query count and duration per time
$normalyzed_info{$normalized}{chronos}{"$cur_day_str"}{"$cur_hour_str"}{count}++;
if ($cur_info{$t_pid}{duration}) {
# Updtate top slowest queries statistics
&set_top_slowest($cur_info{$t_pid}{query}, $cur_info{$t_pid}{duration}, $cur_last_log_timestamp, $cur_info{$t_pid}{dbname}, $cur_info{$t_pid}{dbuser}, $cur_info{$t_pid}{dbclient},$cur_info{$t_pid}{dbappname});
# Store normalized query total duration
$normalyzed_info{$normalized}{duration} += $cur_info{$t_pid}{duration};
# Store min / max duration
if (!exists $normalyzed_info{$normalized}{min} || ($normalyzed_info{$normalized}{min} > $cur_info{$t_pid}{duration})) {
$normalyzed_info{$normalized}{min} = $cur_info{$t_pid}{duration};
}
if (!exists $normalyzed_info{$normalized}{max} || ($normalyzed_info{$normalized}{max} < $cur_info{$t_pid}{duration})) {
$normalyzed_info{$normalized}{max} = $cur_info{$t_pid}{duration};
}
# Store normalized query count and duration per time
$normalyzed_info{$normalized}{chronos}{"$cur_day_str"}{"$cur_hour_str"}{duration} += $cur_info{$t_pid}{duration};
# Store normalized query samples
&set_top_sample($normalized, $cur_info{$t_pid}{query}, $cur_info{$t_pid}{duration}, $overall_stat{'last_log_ts'},$cur_info{$t_pid}{dbname}, $cur_info{$t_pid}{dbuser}, $cur_info{$t_pid}{dbclient},$cur_info{$t_pid}{dbappname});
}
}
}
}
# Normalize error messages
sub normalize_error
{
my $orig_query = shift;
return if (!$orig_query);
# Remove character position
$orig_query =~ s/ at character \d+//;
# Remove encoding detail
$orig_query =~ s/(byte sequence for encoding).*/$1/;
# Replace changing parameter by ...
$orig_query =~ s/"[^"]*"/"..."/g;
$orig_query =~ s/\(.*\)/\(...\)/g;
$orig_query =~ s/column .* does not exist/column "..." does not exist/;
$orig_query =~ s/(database system was shut down at).*/$1 .../;
# Need more normalization stuff here
return $orig_query;
}
sub average_per_minutes
{
my $val = shift;
my $idx = shift;
my @avgs = ();
for (my $i = 0 ; $i < 59 ; $i += $idx) {
push(@avgs, sprintf("%02d", $i));
}
push(@avgs, 59);
for (my $i = 0 ; $i <= $#avgs ; $i++) {
if ($val == $avgs[$i]) {
return "$avgs[$i]";
} elsif ($avgs[$i] == $avgs[-1]) {
return "$avgs[$i-1]";
} elsif (($val > $avgs[$i]) && ($val < $avgs[$i + 1])) {
return "$avgs[$i]";
}
}
return $val;
}
sub autodetect_format
{
my $file = shift;
# Open log file for reading
my $nfound = 0;
my $nline = 0;
my $fmt = '';
die "FATAL: can't open file $file, $!\n" unless(open(TESTFILE, $file));
binmode(TESTFILE);
my $fltf = ;
close($fltf);
# is file in binary format ?
if ( $fltf =~ /^pst\d/ ) {
$fmt = 'binary';
}
else { # try to detect syslogs or csv
my ($tfile, $totalsize) = &get_log_file($file);
my %ident_name = ();
while (my $line = <$tfile>) {
chomp($line);
$line =~ s/\r//;
next if (!$line);
$nline++;
# Are syslog lines ?
if ($line =~
/^[A-Z][a-z]{2}\s+\d+\s\d+:\d+:\d+(?:\s[^\s]+)?\s[^\s]+\s([^\s\[]+)\[\d+\]:(?:\s\[[^\]]+\])?\s\[\d+\-\d+\].*?(LOG|WARNING|ERROR|FATAL|PANIC|DETAIL|STATEMENT|HINT|CONTEXT):/
)
{
$fmt = 'syslog';
$nfound++;
$ident_name{$1}++;
} elsif ($line =~
/^\d+-\d+-\d+T\d+:\d+:\d+(?:.[^\s]+)?\s[^\s]+\s([^\s\[]+)\[\d+\]:(?:\s\[[^\]]+\])?\s\[\d+\-\d+\].*?(LOG|WARNING|ERROR|FATAL|PANIC|DETAIL|STATEMENT|HINT|CONTEXT):/
)
{
$fmt = 'syslog2';
$nfound++;
$ident_name{$1}++;
# Are stderr lines ?
} elsif (
(
$line =~
/^\d+-\d+-\d+ \d+:\d+:\d+\.\d+(?: [A-Z\d]{3,6})?,.*,(LOG|WARNING|ERROR|FATAL|PANIC|DETAIL|STATEMENT|HINT|CONTEXT),/
)
&& ($line =~ tr/,/,/ >= 12)
)
{
$fmt = 'csv';
$nfound++;
} elsif ($line =~
/\d+-\d+-\d+ \d+:\d+:\d+[\.0-9]*(?: [A-Z\d]{3,6})?(.*?)(LOG|WARNING|ERROR|FATAL|PANIC|DETAIL|STATEMENT|HINT|CONTEXT):\s+/
)
{
$fmt = 'stderr';
$nfound++;
}
last if (($nfound > 10) || ($nline > 5000));
}
$tfile->close();
if (!$fmt || ($nfound < 10)) {
die "FATAL: unable to detect log file format from $file, please use -f option.\n";
}
if (($fmt =~ /syslog/) && !$ident && (scalar keys %ident_name == 1)) {
$ident = (keys %ident_name)[0];
}
}
&logmsg('DEBUG', "Autodetected log format '$fmt' from $file");
return $fmt;
}
sub progress_bar
{
my ($got, $total, $width, $char, $queries, $errors) = @_;
$width ||= 25;
$char ||= '=';
my $num_width = length $total;
if ($extension eq 'tsung') {
sprintf(
"[%-${width}s] Parsed %${num_width}s bytes of %s (%.2f%%), queries: %d\r",
$char x (($width - 1) * $got / $total) . '>',
$got, $total, 100 * $got / +$total, ($queries || $tsung_queries)
);
} elsif($format eq 'binary') {
my $file = $_[-1];
sprintf(
"Loaded %d queries and %d events from binary file %s...\r",
$overall_stat{'queries_number'}, $overall_stat{'errors_number'}, $queries
);
} else {
sprintf(
"[%-${width}s] Parsed %${num_width}s bytes of %s (%.2f%%), queries: %d, events: %d\r",
$char x (($width - 1) * $got / $total) . '>',
$got, $total, 100 * $got / +$total, ($queries || $overall_stat{'queries_number'}), ($errors || $overall_stat{'errors_number'})
);
}
}
sub flotr2_graph
{
my ($buttonid, $divid, $data1, $data2, $data3, $title, $ytitle, $legend1, $legend2, $legend3, $ytitle2, $data4, $legend4) = @_;
$data1 = "var d1 = [$data1];" if ($data1);
$data2 = "var d2 = [$data2];" if ($data2);
$data3 = "var d3 = [$data3];" if ($data3);
$data4 = "var d4 = [$data4];" if ($data4);
$legend1 = "{ data: d1, label: \"$legend1\" }," if ($legend1);
$legend2 = "{ data: d2, label: \"$legend2\" }," if ($legend2);
$legend3 = "{ data: d3, label: \"$legend3\" }," if ($legend3);
$legend4 = "{ data: d4, label: \"$legend4\",yaxis: 2 }," if ($legend4);
my $yaxis2 = '';
if ($ytitle2) {
$yaxis2 = "y2axis: { title: \"$ytitle2\", min: 0, color: \"#4DA74D\" },";
}
my $min = $t_min;
my $max = $t_max;
if ($divid !~ /persecond/) {
$min = $t_min_hour;
$max = $t_max_hour;
}
print $fh <
EOF
}
sub flotr2_piegraph
{
my ($buttonid, $divid, $title, %data) = @_;
my @datadef = ();
my @contdef = ();
my $i = 1;
foreach my $k (sort keys %data) {
push(@datadef, "var d$i = [ [0,$data{$k}] ];\n");
push(@contdef, "{ data: d$i, label: \"$k\" },\n");
$i++;
}
print $fh <
EOF
}
sub build_log_line_prefix_regex
{
my %regex_map = (
'%a' => [('t_appname', '([0-9a-zA-Z\.\-\_\/\[\]]*)')], # application name
'%u' => [('t_dbuser', '([0-9a-zA-Z\_\[\]\-]*)')], # user name
'%d' => [('t_dbname', '([0-9a-zA-Z\_\[\]\-]*)')], # database name
'%r' => [('t_hostport', '([a-zA-Z0-9\-\.]+|\[local\]|\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})?[\(\d\)]*')], # remote host and port
'%h' => [('t_client', '([a-zA-Z0-9\-\.]+|\[local\]|\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})?')], # remote host
'%p' => [('t_pid', '(\d+)')], # process ID
'%t' => [('t_timestamp', '(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})(?: [A-Z\d]{3,6})?')], # timestamp without milliseconds
'%m' => [('t_mtimestamp', '(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})\.\d+(?: [A-Z\d]{3,6})?')], # timestamp with milliseconds
'%l' => [('t_session_line', '(\d+)')], # session line number
'%s' => [('t_session_timestamp', '(\d{4}-\d{2}-\d{2} \d{2}):\d{2}:\d{2}(?: [A-Z\d]{3,6})?')], # session start timestamp
'%c' => [('t_session_id', '([0-9a-f\.]*)')], # session ID
'%v' => [('t_virtual_xid', '([0-9a-f\.\/]*)')], # virtual transaction ID
'%x' => [('t_xid', '([0-9a-f\.\/]*)')], # transaction ID
'%i' => [('t_command', '([0-9a-zA-Z\.\-\_]*)')], # command tag
'%e' => [('t_sqlstate', '([0-9a-zA-Z]+)')], # SQL state
);
my @param_list = ();
$log_line_prefix =~ s/([\[\]\|\(\)\{\}])/\\$1/g;
$log_line_prefix =~ s/\%l([^\d])\d+/\%l$1\\d\+/;
while ($log_line_prefix =~ s/(\%[audrhptmlscvxie])/$regex_map{"$1"}->[1]/) {
push(@param_list, $regex_map{"$1"}->[0]);
}
# replace %% by a single %
$log_line_prefix =~ s/\%\%/\%/;
return @param_list;
}
# Inclusion of Perl package SQL::Beautify
# Copyright (C) 2009 by Jonas Kramer
# Published under the terms of the Artistic License 2.0.
{
package SQL::Beautify;
use strict;
use warnings;
our $VERSION = 0.04;
use Carp;
# Keywords from SQL-92, SQL-99 and SQL-2003.
use constant KEYWORDS => qw(
ABSOLUTE ACTION ADD AFTER ALL ALLOCATE ALTER AND ANY ARE ARRAY AS ASC
ASENSITIVE ASSERTION ASYMMETRIC AT ATOMIC AUTHORIZATION AVG BEFORE BEGIN
BETWEEN BIGINT BINARY BIT BIT_LENGTH BLOB BOOLEAN BOTH BREADTH BY CALL
CALLED CASCADE CASCADED CASE CAST CATALOG CHAR CHARACTER CHARACTER_LENGTH
CHAR_LENGTH CHECK CLOB CLOSE COALESCE COLLATE COLLATION COLUMN COMMIT
CONDITION CONNECT CONNECTION CONSTRAINT CONSTRAINTS CONSTRUCTOR CONTAINS
CONTINUE CONVERT CORRESPONDING COUNT CREATE CROSS CUBE CURRENT CURRENT_DATE
CURRENT_DEFAULT_TRANSFORM_GROUP CURRENT_PATH CURRENT_ROLE CURRENT_TIME
CURRENT_TIMESTAMP CURRENT_TRANSFORM_GROUP_FOR_TYPE CURRENT_USER CURSOR
CYCLE DATA DATE DAY DEALLOCATE DEC DECIMAL DECLARE DEFAULT DEFERRABLE
DEFERRED DELETE DEPTH DEREF DESC DESCRIBE DESCRIPTOR DETERMINISTIC
DIAGNOSTICS DISCONNECT DISTINCT DO DOMAIN DOUBLE DROP DYNAMIC EACH ELEMENT
ELSE ELSEIF END EPOCH EQUALS ESCAPE EXCEPT EXCEPTION EXEC EXECUTE EXISTS
EXIT EXTERNAL EXTRACT FALSE FETCH FILTER FIRST FLOAT FOR FOREIGN FOUND FREE
FROM FULL FUNCTION GENERAL GET GLOBAL GO GOTO GRANT GROUP GROUPING HANDLER
HAVING HOLD HOUR IDENTITY IF IMMEDIATE IN INDICATOR INITIALLY INNER INOUT
INPUT INSENSITIVE INSERT INT INTEGER INTERSECT INTERVAL INTO IS ISOLATION
ITERATE JOIN KEY LANGUAGE LARGE LAST LATERAL LEADING LEAVE LEFT LEVEL LIKE
LIMIT LOCAL LOCALTIME LOCALTIMESTAMP LOCATOR LOOP LOWER MAP MATCH MAX
MEMBER MERGE METHOD MIN MINUTE MODIFIES MODULE MONTH MULTISET NAMES
NATIONAL NATURAL NCHAR NCLOB NEW NEXT NO NONE NOT NULL NULLIF NUMERIC
OBJECT OCTET_LENGTH OF OLD ON ONLY OPEN OPTION OR ORDER ORDINALITY OUT
OUTER OUTPUT OVER OVERLAPS PAD PARAMETER PARTIAL PARTITION PATH POSITION
PRECISION PREPARE PRESERVE PRIMARY PRIOR PRIVILEGES PROCEDURE PUBLIC RANGE
READ READS REAL RECURSIVE REF REFERENCES REFERENCING RELATIVE RELEASE
REPEAT RESIGNAL RESTRICT RESULT RETURN RETURNS REVOKE RIGHT ROLE ROLLBACK
ROLLUP ROUTINE ROW ROWS SAVEPOINT SCHEMA SCOPE SCROLL SEARCH SECOND SECTION
SELECT SENSITIVE SESSION SESSION_USER SET SETS SIGNAL SIMILAR SIZE SMALLINT
SOME SPACE SPECIFIC SPECIFICTYPE SQL SQLCODE SQLERROR SQLEXCEPTION SQLSTATE
SQLWARNING START STATE STATIC SUBMULTISET SUBSTRING SUM SYMMETRIC SYSTEM
SYSTEM_USER TABLE TABLESAMPLE TEMPORARY TEXT THEN TIME TIMESTAMP
TIMEZONE_HOUR TIMEZONE_MINUTE TINYINT TO TRAILING TRANSACTION TRANSLATE
TRANSLATION TREAT TRIGGER TRIM TRUE UNDER UNDO UNION UNIQUE UNKNOWN UNNEST
UNTIL UPDATE UPPER USAGE USER USING VALUE VALUES VARCHAR VARYING VIEW WHEN
WHENEVER WHERE WHILE WINDOW WITH WITHIN WITHOUT WORK WRITE YEAR ZONE
);
sub tokenize_sql
{
my ($query, $remove_white_tokens) = @_;
my $re = qr{
(
(?:--|\#)[\ \t\S]* # single line comments
|
(?:<>|<=>|>=|<=|==|=|!=|!|<<|>>|<|>|\|\||\||&&|&|-|\+|\*(?!/)|/(?!\*)|\%|~|\^|\?)
# operators and tests
|
[\[\]\(\),;.] # punctuation (parenthesis, comma)
|
\'\'(?!\') # empty single quoted string
|
\"\"(?!\"") # empty double quoted string
|
"(?>(?:(?>[^"\\]+)|""|\\.)*)+"
# anything inside double quotes, ungreedy
|
`(?>(?:(?>[^`\\]+)|``|\\.)*)+`
# anything inside backticks quotes, ungreedy
|
'(?>(?:(?>[^'\\]+)|''|\\.)*)+'
# anything inside single quotes, ungreedy.
|
/\*[\ \t\r\n\S]*?\*/ # C style comments
|
(?:[\w:@]+(?:\.(?:\w+|\*)?)*)
# words, standard named placeholders, db.table.*, db.*
|
(?: \$_\$ | \$\d+ | \${1,2} )
# dollar expressions - eg $_$ $3 $$
|
\n # newline
|
[\t\ ]+ # any kind of white spaces
)
}smx;
my @query = ();
@query = $query =~ m{$re}smxg;
if ($remove_white_tokens) {
@query = grep(!/^[\s\n\r]*$/, @query);
}
return wantarray ? @query : \@query;
}
sub new
{
my ($class, %options) = @_;
my $self = bless {%options}, $class;
# Set some defaults.
$self->{query} = '' unless defined($self->{query});
$self->{spaces} = 4 unless defined($self->{spaces});
$self->{space} = ' ' unless defined($self->{space});
$self->{break} = "\n" unless defined($self->{break});
$self->{wrap} = {} unless defined($self->{wrap});
$self->{keywords} = [] unless defined($self->{keywords});
$self->{rules} = {} unless defined($self->{rules});
$self->{uc_keywords} = 0 unless defined $self->{uc_keywords};
push @{$self->{keywords}}, KEYWORDS;
# Initialize internal stuff.
$self->{_level} = 0;
return $self;
}
# Add more SQL.
sub add
{
my ($self, $addendum) = @_;
$addendum =~ s/^\s*/ /;
$self->{query} .= $addendum;
}
# Set SQL to beautify.
sub query
{
my ($self, $query) = @_;
$self->{query} = $query if (defined($query));
return $self->{query};
}
# Beautify SQL.
sub beautify
{
my ($self) = @_;
$self->{_output} = '';
$self->{_level_stack} = [];
$self->{_new_line} = 1;
my $last = '';
$self->{_tokens} = [tokenize_sql($self->query, 1)];
while (defined(my $token = $self->_token)) {
my $rule = $self->_get_rule($token);
# Allow custom rules to override defaults.
if ($rule) {
$self->_process_rule($rule, $token);
}
elsif ($token eq '(') {
$self->_add_token($token);
$self->_new_line;
push @{$self->{_level_stack}}, $self->{_level};
$self->_over unless $last and uc($last) eq 'WHERE';
}
elsif ($token eq ')') {
# $self->_new_line;
$self->{_level} = pop(@{$self->{_level_stack}}) || 0;
$self->_add_token($token);
$self->_new_line if ($self->_next_token
and $self->_next_token !~ /^AS$/i
and $self->_next_token ne ')'
and $self->_next_token !~ /::/
and $self->_next_token ne ';'
);
}
elsif ($token eq ',') {
$self->_add_token($token);
$self->_new_line;
}
elsif ($token eq ';') {
$self->_add_token($token);
$self->_new_line;
# End of statement; remove all indentation.
@{$self->{_level_stack}} = ();
$self->{_level} = 0;
}
elsif ($token =~ /^(?:SELECT|FROM|WHERE|HAVING|BEGIN|SET)$/i) {
$self->_back if ($last and $last ne '(' and $last ne 'FOR');
$self->_new_line;
$self->_add_token($token);
$self->_new_line if ((($token ne 'SET') || $last) and $self->_next_token and $self->_next_token ne '(' and $self->_next_token ne ';');
$self->_over;
}
elsif ($token =~ /^(?:GROUP|ORDER|LIMIT)$/i) {
$self->_back;
$self->_new_line;
$self->_add_token($token);
}
elsif ($token =~ /^(?:BY)$/i) {
$self->_add_token($token);
$self->_new_line;
$self->_over;
}
elsif ($token =~ /^(?:CASE)$/i) {
$self->_add_token($token);
$self->_over;
}
elsif ($token =~ /^(?:WHEN)$/i) {
$self->_new_line;
$self->_add_token($token);
}
elsif ($token =~ /^(?:ELSE)$/i) {
$self->_new_line;
$self->_add_token($token);
}
elsif ($token =~ /^(?:END)$/i) {
$self->_back;
$self->_new_line;
$self->_add_token($token);
}
elsif ($token =~ /^(?:UNION|INTERSECT|EXCEPT)$/i) {
$self->_back unless $last and $last eq '(';
$self->_new_line;
$self->_add_token($token);
$self->_new_line if ($self->_next_token and $self->_next_token ne '(');
$self->_over;
}
elsif ($token =~ /^(?:LEFT|RIGHT|INNER|OUTER|CROSS)$/i) {
$self->_back;
$self->_new_line;
$self->_add_token($token);
$self->_over;
}
elsif ($token =~ /^(?:JOIN)$/i) {
if ($last and $last !~ /^(?:LEFT|RIGHT|INNER|OUTER|CROSS)$/) {
$self->_new_line;
}
$self->_add_token($token);
}
elsif ($token =~ /^(?:AND|OR)$/i) {
$self->_new_line;
$self->_add_token($token);
# $self->_new_line;
}
elsif ($token =~ /^--/) {
if (!$self->{no_comments}) {
$self->_add_token($token);
$self->_new_line;
}
}
elsif ($token =~ /^\/\*.*\*\/$/s) {
if (!$self->{no_comments}) {
$token =~ s/\n[\s\t]+\*/\n\*/gs;
$self->_new_line;
$self->_add_token($token);
$self->_new_line;
}
}
else {
$self->_add_token($token, $last);
}
$last = $token;
}
$self->_new_line;
$self->{_output};
}
# Add a token to the beautified string.
sub _add_token
{
my ($self, $token, $last_token) = @_;
if ($self->{wrap}) {
my $wrap;
if ($self->_is_keyword($token)) {
$wrap = $self->{wrap}->{keywords};
} elsif ($self->_is_constant($token)) {
$wrap = $self->{wrap}->{constants};
}
if ($wrap) {
$token = $wrap->[0] . $token . $wrap->[1];
}
}
my $last_is_dot = defined($last_token) && $last_token eq '.';
if (!$self->_is_punctuation($token) and !$last_is_dot) {
$self->{_output} .= $self->_indent;
}
# uppercase keywords
$token = uc $token
if $self->_is_keyword($token)
and $self->{uc_keywords};
$self->{_output} .= $token;
# This can't be the beginning of a new line anymore.
$self->{_new_line} = 0;
}
# Increase the indentation level.
sub _over
{
my ($self) = @_;
++$self->{_level};
}
# Decrease the indentation level.
sub _back
{
my ($self) = @_;
--$self->{_level} if ($self->{_level} > 0);
}
# Return a string of spaces according to the current indentation level and the
# spaces setting for indenting.
sub _indent
{
my ($self) = @_;
if ($self->{_new_line}) {
return $self->{space} x ($self->{spaces} * $self->{_level});
} else {
return $self->{space};
}
}
# Add a line break, but make sure there are no empty lines.
sub _new_line
{
my ($self) = @_;
$self->{_output} .= $self->{break} unless ($self->{_new_line});
$self->{_new_line} = 1;
}
# Have a look at the token that's coming up next.
sub _next_token
{
my ($self) = @_;
return @{$self->{_tokens}} ? $self->{_tokens}->[0] : undef;
}
# Get the next token, removing it from the list of remaining tokens.
sub _token
{
my ($self) = @_;
return shift @{$self->{_tokens}};
}
# Check if a token is a known SQL keyword.
sub _is_keyword
{
my ($self, $token) = @_;
return ~~ grep {$_ eq uc($token)} @{$self->{keywords}};
}
# Add new keywords to highlight.
sub add_keywords
{
my $self = shift;
for my $keyword (@_) {
push @{$self->{keywords}}, ref($keyword) ? @{$keyword} : $keyword;
}
}
# Add new rules.
sub add_rule
{
my ($self, $format, $token) = @_;
my $rules = $self->{rules} ||= {};
my $group = $rules->{$format} ||= [];
push @{$group}, ref($token) ? @{$token} : $token;
}
# Find custom rule for a token.
sub _get_rule
{
my ($self, $token) = @_;
values %{$self->{rules}}; # Reset iterator.
while (my ($rule, $list) = each %{$self->{rules}}) {
return $rule if (grep {uc($token) eq uc($_)} @$list);
}
return undef;
}
sub _process_rule
{
my ($self, $rule, $token) = @_;
my $format = {
break => sub {$self->_new_line},
over => sub {$self->_over},
back => sub {$self->_back},
token => sub {$self->_add_token($token)},
push => sub {push @{$self->{_level_stack}}, $self->{_level}},
pop => sub {$self->{_level} = pop(@{$self->{_level_stack}}) || 0},
reset => sub {$self->{_level} = 0; @{$self->{_level_stack}} = ();},
};
for (split /-/, lc $rule) {
&{$format->{$_}} if ($format->{$_});
}
}
# Check if a token is a constant.
sub _is_constant
{
my ($self, $token) = @_;
return ($token =~ /^\d+$/ or $token =~ /^(['"`]).*\1$/);
}
# Check if a token is punctuation.
sub _is_punctuation
{
my ($self, $token) = @_;
return ($token =~ /^[,;.]$/);
}
}
sub get_log_file
{
my $logf = shift;
my $lfile = undef;
# get file size
my $totalsize = (stat("$logf"))[7] || 0;
# Open a file handle
if ($logf !~ /\.(gz|bz2|zip)/i) {
open($lfile, $logf) || die "FATAL: cannot read log file $logf. $!\n";
$totalsize = 0 if ($lfile eq '-');
} else {
my $uncompress = $zcat;
if (($logf =~ /\.bz2/i) && ($zcat =~ /^$zcat_cmd$/)) {
$uncompress = $bzcat;
} elsif (($logf =~ /\.zip/i) && ($zcat =~ /^$zcat_cmd$/)) {
$uncompress = $ucat;
}
&logmsg('DEBUG', "Compressed log file, will use command: $uncompress \"$logf\"");
# Open a pipe to zcat program for compressed log
open($lfile,"$uncompress \"$logf\" |") || die "FATAL: cannot read from pipe to $uncompress \"$logf\". $!\n";
# Real size of the file is unknown, try to find it
# bz2 does not report real size
$totalsize = 0;
if ($logf =~ /\.(gz|zip)/i) {
my $cmd_file_size = $gzip_uncompress_size;
if ($logf =~ /\.zip/i) {
$cmd_file_size = $zip_uncompress_size;
}
$cmd_file_size =~ s/\%f/$logf/g;
$totalsize = `$cmd_file_size`;
chomp($totalsize);
}
if ($queue_size) {
$job_per_file = $queue_size;
$queue_size = 0;
}
}
# In list context returns the filehandle and the size of the file
if (wantarray()) {
return ($lfile, $totalsize);
}
# In scalar context return size only
close($lfile);
return $totalsize;
}
sub split_logfile
{
my $logf = shift;
# CSV file can't be parsed using multiprocessing
return (0, -1) if ( $format eq 'csv' );
# get file size
my $totalsize = (stat("$logf"))[7] || 0;
# Real size of the file is unknown, try to find it
# bz2 does not report real size
if ($logf =~ /\.(gz|zip)/i) {
$totalsize = 0;
my $cmd_file_size = $gzip_uncompress_size;
if ($logf =~ /\.zip/i) {
$cmd_file_size = $zip_uncompress_size;
}
$cmd_file_size =~ s/\%f/$logf/g;
$totalsize = `$cmd_file_size`;
chomp($totalsize);
if ($queue_size) {
$job_per_file = $queue_size;
$queue_size = 0;
}
} elsif ($logf =~ /\.bz2/i) {
$totalsize = 0;
}
return (0, -1) if (!$totalsize);
my @chunks = (0);
my $i = 1;
while ($i < $queue_size) {
push(@chunks, int(($totalsize/$queue_size) * $i));
$i++;
}
push(@chunks, $totalsize);
return @chunks;
}
__DATA__