pax_global_header00006660000000000000000000000064146751145220014521gustar00rootroot0000000000000052 comment=97cc5976f2afedfd1fcd7071833b0a6f5ddffee9 pg_qualstats-2.1.1/000077500000000000000000000000001467511452200142315ustar00rootroot00000000000000pg_qualstats-2.1.1/.github/000077500000000000000000000000001467511452200155715ustar00rootroot00000000000000pg_qualstats-2.1.1/.github/workflows/000077500000000000000000000000001467511452200176265ustar00rootroot00000000000000pg_qualstats-2.1.1/.github/workflows/powa_archivist.yml000066400000000000000000000017511467511452200233770ustar00rootroot00000000000000name: Trigger build and push of powa-archivist image on: push: # https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#filter-pattern-cheat-sheet tags-ignore: - 'debian/*' env: TARGET_REPO: "powa-podman" EVENT_TYPE: "powa-archivist" jobs: trigger_build: name: Trigger build and push of powa-archivist in powa-podman repo runs-on: ubuntu-latest steps: - name: Trigger the powa-archivist-git repository dispatch run: | # Set variables org="${{ github.repository_owner }}" repo="${{ env.TARGET_REPO }}" event_type="${{ env.EVENT_TYPE }}" curl -L \ -X POST \ -H "Accept: application/vnd.github+json" \ -H "Authorization: Bearer ${{ secrets.DISPATCH_TOKEN }}" \ -H "X-GitHub-Api-Version: 2022-11-28" \ https://api.github.com/repos/${org}/${repo}/dispatches \ -d "{\"event_type\": \"${event_type}\"}" pg_qualstats-2.1.1/.github/workflows/powa_archivist_git.yml000066400000000000000000000015611467511452200242410ustar00rootroot00000000000000name: Trigger build and push of powa-archivist-git image on: push: branches: [master] env: TARGET_REPO: "powa-podman" EVENT_TYPE: "powa-archivist-git" jobs: trigger_build: name: Trigger build and push of powa-archivist-git in powa-podman repo runs-on: ubuntu-latest steps: - name: Trigger the powa-archivist-git repository dispatch run: | # Set variables org="${{ github.repository_owner }}" repo="${{ env.TARGET_REPO }}" event_type="${{ env.EVENT_TYPE }}" curl -L \ -X POST \ -H "Accept: application/vnd.github+json" \ -H "Authorization: Bearer ${{ secrets.DISPATCH_TOKEN }}" \ -H "X-GitHub-Api-Version: 2022-11-28" \ https://api.github.com/repos/${org}/${repo}/dispatches \ -d "{\"event_type\": \"${event_type}\"}" pg_qualstats-2.1.1/.github/workflows/tests.yml000066400000000000000000000060131467511452200215130ustar00rootroot00000000000000name: Run pg_qualstats tests on: push: branches: - master pull_request: branches: - master env: DATADIR: /dev/shm/data LOGFILE: /dev/shm/data/logfile jobs: pg_qualstats_tests: name: pg_qualstats tests runs-on: ${{ matrix.os }} strategy: matrix: postgres_major_version: [ "12", "13", "14", "15", "16" ] os: ["ubuntu-22.04"] steps: - uses: actions/checkout@v3 - name: Set up prerequisites and environment run: | echo "************ CLEAN IMAGE ***********" sudo apt remove -y '^postgres.*' '^libpq.*' echo "" echo "********* REPOSITORY SET UP ********" sudo apt-get install -y wget gnupg sudo sh -c 'echo "deb http://apt.postgresql.org/pub/repos/apt $(lsb_release -cs)-pgdg main" > /etc/apt/sources.list.d/pgdg.list' wget --quiet -O - https://www.postgresql.org/media/keys/ACCC4CF8.asc | sudo apt-key add - sudo apt-get update -y -qq --fix-missing echo "" echo "*********** ENVIRONMENT ************" export PG_MAJOR_VERSION=${{ matrix.postgres_major_version }} echo "PG_MAJOR_VERSION=$PG_MAJOR_VERSION" >> $GITHUB_ENV echo "MAKEFLAGS=$MAKEFLAGS -j $(grep -c ^processor /proc/cpuinfo)" >> $GITHUB_ENV echo "" echo "******** INSTALL POSTGRES **********" sudo apt-get install -y \ postgresql-$PG_MAJOR_VERSION \ postgresql-server-dev-$PG_MAJOR_VERSION \ postgresql-contrib-$PG_MAJOR_VERSION echo "" echo "******* INSTALL DEPENDENCIES *******" sudo apt-get install -y \ gcc \ make \ build-essential \ pkg-config echo "" echo "********** READJUST PATH ***********" export PATH=$(pg_config --bindir):$PATH echo "PATH=$PATH" >> $GITHUB_ENV cat $GITHUB_ENV echo "" - name: Start a postgres ${{ matrix.postgres_major_version }} server run: | sudo chmod a+rwx /var/run/postgresql/ pg_ctl -D $DATADIR initdb echo "shared_preload_libraries = 'pg_stat_statements'" >> $DATADIR/postgresql.conf pg_ctl -D $DATADIR -l $LOGFILE start || cat $LOGFILE psql -c 'select 1 as ok' postgres - name: Build and install pg_qualstats for postgres ${{ matrix.postgres_major_version }} run: | make sudo make install - name: Enable pg_qualstats on postgres ${{ matrix.postgres_major_version }} server run: | echo "shared_preload_libraries = 'pg_stat_statements,pg_qualstats'" >> $DATADIR/postgresql.conf pg_ctl -D $DATADIR -l $LOGFILE restart || cat $LOGFILE psql -c 'select 1 as ok' postgres - name: Run pg_qualstats tests for postgres ${{ matrix.postgres_major_version }} run: make installcheck || ( errcode=$?; cat regression.diffs && exit $errcode ) - name: Stop the running postgres ${{ matrix.postgres_major_version }} server run: pg_ctl -D $DATADIR stop pg_qualstats-2.1.1/.gitignore000066400000000000000000000000171467511452200162170ustar00rootroot00000000000000*.o *.so *.zip pg_qualstats-2.1.1/CHANGELOG000066400000000000000000000151721467511452200154510ustar00rootroot000000000000002.1.1 Miscellaneous: - Add support for PostgreSQL 17 (Georgy Shelkovy) Bugfix: - Fix memory leak (Ronan Dunklau) - Fix column order of multi-column index in the index advisor (Julien Rouhaud, per report from disqus user Sivan) 2.1.0 New feature: - Allow custom schema at extension creation time (Julien Rouhaud) - Report the queryid in the index advisor (Julien Rouhaud, thanks to Zhihong Yu for the request) Bugfix: - Mark the extension as not relocatable as it internally references the extension schema (Julien Rouhaud) 2.0.4 Bugfix: - Fix memory allocation for PostgreSQL 12 and above (Julien Rouhaud) - Fix bug when append plans are empty (Ronan Dunklau) Miscellaneous: - Fix a faulty strncpy() call (Julien Rouhaud, per report from github user RekGRpth) - Fix some cppcheck warnings (Julien Rouhaud, per report from github user RekGRpth) - Add compatibility with PostgreSQL 15 (Julien Rouhaud) - Improve extension ugprade documentation (Julien Rouhaud) - Make sure the code can be compiled and used on Windows (Julien Rouhaud, per report from github user suprimex) 2.0.3 Miscellaneous: - Add support for PostgreSQL 14 (Julien Rouhaud, per report from Michael Paquier) - Add extension update documentation (Julien Rouhaud) - Debian packaging improvements (Christoph Berg) 2.0.2 Bugfix: - Correctly handle different versions for the SQL objects and the shared library (Julien Rouhaud, per report from github users seqizz and akovac) 2.0.1 Bugfix: - Fix compilation for PG11- on some platforms (Julien Rouhaud, per report from Devrim Gündüz) 2.0.0 New features: - Handle statistics on selectivity estimation errors (Julien Rouhaud, per idea from Oleg Bartunov) - Add an index advisor feature (Julien Rouhaud) - Allow pgqs to work locally without being in shared_preload_libraries (Julien Rouhaud) Bugfix: - Fix handling of quals of the form Constant Operator Val where the operator doesn't have a commutator - Compute the same identifier whether the operator was commuted or not - Correctly discard all quals not referencing a relation - Fix (unique)qualid computation to only consider AND-ed quals - Don't expose recorded constants to unauthorized users Miscellaneous: - Add missing fields description (Julien Rouhaud) 1.0.9 Bugfix: - Fix constant value truncation when multibyte encoding characters are used (thanks to Gürkan Gür for the report) Miscellaneous: - Remove unneeded cast, that prevented compilation at least on Solaris 10 SPARC (thanks to github user edechaux for the report) 1.0.8 Miscellaneous: - Fix pg12 compatibility - Fix possible issue with array processing 1.0.7 Bugfix: - Fix a bug for queries having JOIN or WHERE predicates on foreign tables or custom scans (Julien Rouhaud). Thanks a lot to Andrej Urvantsev, Raymond Barbiero and mbroxson who all reported this issue, and especially to mbroxson who provided a reproducer! Miscellaneous: - Fix debian packaging to ignore debian/* tags (Christoph Berg) 1.0.6 Bugfix: - Fix bug for handling of nodes having multiple children, such as Append node (Julien Rouhaud). Miscellaneous: - Fix compilation issue with C90 compatibility (Julien Rouhaud) - Fix README.d installation in debian packaging (Thanks to Andreas Beckmann for the report) 1.0.5: Incompatibilites: - Due to changes in pg_stat_statements in 11, queryid is now on 64 bits. SQL functions no longer use oid type but bigint for queryid attribute (even for PG prior to 11). Miscellaneous: - Add pg 11 compatibility (Adrien Nayrat helped by Julien Rouhaud and Thomas Reiss) - Warn if incorrect configuration setting is used 1.0.4: - Fix a bug in Bitmap Index Scan nodes handling for PostgreSQL 10+ (Fix by Julien Rouhaud, thanks to Marc Cousin and Adrien Nayrat for reporting the issue) - Fix sampled array buffer overflow (Fix by Julien Rouhaud, reporting and much testing by Nicolas Gollet) 1.0.3: Bugfix: - Fix a missing call to InstrEndLoop (Tomas Vondra) - Sample all nested queries when top level statement is sampled (Julien Rouhaud) - Make sure hash keys can be compared using memcmp (Julien Rouhaud) - Fix behavior with parallel queries (Julien Rouhaud based on a patch by Tomas Vondra) - Fix bug on TEXTCONST not being byval (Ronan Dunklau) - Fix 64bits counters on pass-by-ref float8 architectures (Julien Rouhaud) - Fix bug in pg_qualstats_names (Ronan Dunklau) - Fix bug in const position (Julien Rouhaud) - Fix pg_qualstats_pretty to use text instead of regoper, allowing usage of pg_upgrade when pg_qualstats is installed (Julien Rouhaud) - Fix segfault when interleaved executors cause bad sampling detection (Julien Rouhaud, reported by Andreas Seltenreich) Miscellaneous: - Add pg 10 compatibility (Julien Rouhaud) - Do not install docs anymore (Ronan Dunklau) - Add missing occurences/nbfiltered fields to pg_qualstats_pretty and pg_qualstats_all views (Julien Rouhaud) 1.0.2: Bugfix - Fix infinite loop for queries having a huge number of WHERE or JOIN clauses 1.0.1: Bugfix - Fix impossibility to install pg_qualstats if intarray extension is installed 1.0.0: Incompatibilites: - RenameGUC from sample_ratio to sample_rate Bugfix: - only get the exclusive lock on shared memory when needed - Fix bugs related to outer var resolution - Add missing function prototype Miscellaneous: - Add 9.6 compatibility - Code and comment cleanup Thanks to Thomas Vondra and Julien Rouhaud ! 0.0.9: - add sample_ratio hook - fix mistake while releasing 0.0.8 0.0.8: - add sample_ratio parameter 0.0.7: - fix counters for 32 bits builds - handle different collations for constants sampling - add a new "occurences" field, displaying the number of qual call - keep a unnormalized query string for each queryid - fix a bug with operator id retrieval - handles casts - add stats collection for nestedloops - handle FuncExpr and MinMaxExpr - improve performances for queries having multiple quals 0.0.6: - order quals and constants by their text positions. - fix bug with Index-Only Scans, which where not correctly supported - make pg_config configurable from the make invocation - ensure pg_qualstats is in shared_preload_libraries 0.0.5: - fix bug with = ANY(NULL) expressions 0.0.4: - add inline documentation in the sql script - fix a bug with 32bits builds (thanks to Alain Delorme for reporting it) pg_qualstats-2.1.1/CONTRIBUTORS.md000066400000000000000000000005721467511452200165140ustar00rootroot00000000000000 * Ronan Dunklau * Vik Fearing * Thomas Reiss * Julien Rouhaud * shribe * Tomas Vondra * Stéphane Tachoires * Pavel Trukhanov * Andreas Seltenreich * Nicolas Gollet * Gürkan Gür * Devrim Gündüz * github user seqizz * github user akovac * Romain DEP * github user suprimex * github user RekGRpth * Zhihong Yu * github user romanstingler pg_qualstats-2.1.1/LICENSE000066400000000000000000000017531467511452200152440ustar00rootroot00000000000000Copyright (c) 2014-2017 Ronan Dunklau Copyright (c) 2018-2024 The Powa-Team Permission to use, copy, modify, and distribute this software and its documentation for any purpose, without fee, and without a written agreement is hereby granted, provided that the above copyright notice and this paragraph and the following two paragraphs appear in all copies. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. pg_qualstats-2.1.1/META.json000066400000000000000000000011301467511452200156450ustar00rootroot00000000000000{ "name": "pg_qualstats", "abstract": "An extension collecting statistics about predicates", "version": "__VERSION__", "maintainer": "Ronan Dunklau ", "license": "postgresql", "release_status": "stable", "provides": { "pg_qualstats": { "abstract": "An extension collecting statistics about predicates", "file": "pg_qualstats.sql", "docfile": "doc/README.md", "version": "__VERSION__" } }, "meta-spec": { "version": "1.0.0", "url": "http://pgxn.org/meta/spec.txt" } } pg_qualstats-2.1.1/Makefile000066400000000000000000000015211467511452200156700ustar00rootroot00000000000000EXTENSION = pg_qualstats EXTVERSION = $(shell grep default_version $(EXTENSION).control | sed -e "s/default_version[[:space:]]*=[[:space:]]*'\([^']*\)'/\1/") TESTS = $(wildcard test/sql/*.sql) REGRESS = $(patsubst test/sql/%.sql,%,$(TESTS)) REGRESS_OPTS = --inputdir=test MODULES = $(patsubst %.c,%,$(wildcard *.c)) PG_CONFIG ?= pg_config all: release-zip: all git archive --format zip --prefix=pg_qualstats-$(EXTVERSION)/ --output ./pg_qualstats-$(EXTVERSION).zip HEAD unzip ./pg_qualstats-$(EXTVERSION).zip rm ./pg_qualstats-$(EXTVERSION).zip sed -i -e "s/__VERSION__/$(EXTVERSION)/g" ./pg_qualstats-$(EXTVERSION)/META.json zip -r ./pg_qualstats-$(EXTVERSION).zip ./pg_qualstats-$(EXTVERSION)/ rm ./pg_qualstats-$(EXTVERSION) -rf DATA = $(wildcard *--*.sql) PGXS := $(shell $(PG_CONFIG) --pgxs) include $(PGXS) pg_qualstats-2.1.1/README.md000077700000000000000000000000001467511452200175272doc/README.mdustar00rootroot00000000000000pg_qualstats-2.1.1/debian/000077500000000000000000000000001467511452200154535ustar00rootroot00000000000000pg_qualstats-2.1.1/debian/changelog000066400000000000000000000051301467511452200173240ustar00rootroot00000000000000pg-qualstats (2.1.0-1) unstable; urgency=medium * New upstream version. -- Julien Rouhaud Mon, 18 Sep 2023 16:49:06 +0800 pg-qualstats (2.0.4-2) unstable; urgency=medium * Upload for PostgreSQL 15. * debian/watch: Look at GitHub tags instead of releases. -- Christoph Berg Fri, 21 Oct 2022 10:53:59 +0200 pg-qualstats (2.0.4-1) unstable; urgency=medium * New upstream version with support for PostgreSQL 15. -- Julien Rouhaud Mon, 16 May 2022 14:44:30 +0800 pg-qualstats (2.0.3-1) unstable; urgency=medium * New upstream version with support for PostgreSQL 14. * Fix github watch file. * Add myself to Uploaders. -- Christoph Berg Fri, 08 Oct 2021 10:03:34 +0200 pg-qualstats (2.0.2-2) unstable; urgency=medium * Team upload for PostgreSQL 13. * Source format 3.0 (quilt). * Use dh --with pgxs_loop. * R³: no. * DH 13. * debian/tests: Use 'make' instead of postgresql-server-dev-all. -- Christoph Berg Mon, 19 Oct 2020 11:24:18 +0200 pg-qualstats (2.0.2-1) unstable; urgency=medium * New upstream version -- Julien Rouhaud Sat, 23 May 2020 19:02:15 +0000 pg-qualstats (2.0.1-1) unstable; urgency=medium * New upstream version -- Julien Rouhaud Sat, 25 Apr 2020 12:49:35 +0000 pg-qualstats (1.0.9-1) unstable; urgency=medium * New upstream version. -- Julien Rouhaud Thu, 05 Sep 2019 08:26:19 +0200 pg-qualstats (1.0.8-1) experimental; urgency=medium * New upstream version compatible with PG12. -- Christoph Berg Fri, 31 May 2019 14:12:34 +0200 pg-qualstats (1.0.7-1) unstable; urgency=medium [ Christoph Berg ] * Fix watch file to ignore debian/* tags. [ Julien Rouhaud ] * New upstream version -- Julien Rouhaud Thu, 15 Nov 2018 21:31:52 +0000 pg-qualstats (1.0.6-1) unstable; urgency=medium * New upstream version * Fix "broken symlink: /usr/share/doc/postgresql-11-pg- qualstats/README.md -> doc/README.md". The README.md is a symlink to doc/README.md, so just install doc/README.md. Thanks to Andreas Beckmann for the report! (Closes: #911476) -- Julien Rouhaud Sun, 21 Oct 2018 21:00:57 +0000 pg-qualstats (1.0.5-2) unstable; urgency=medium * Team upload. * Upload for PostgreSQL 11. -- Christoph Berg Fri, 12 Oct 2018 13:05:30 +0200 pg-qualstats (1.0.5-1) unstable; urgency=low * Initial release. -- Julien Rouhaud Sun, 22 Jul 2018 23:38:27 +0100 pg_qualstats-2.1.1/debian/control000066400000000000000000000017251467511452200170630ustar00rootroot00000000000000Source: pg-qualstats Section: database Priority: optional Maintainer: Julien Rouhaud Uploaders: Christoph Berg , Standards-Version: 4.6.2 Rules-Requires-Root: no Build-Depends: debhelper-compat (= 13), postgresql-all (>= 217~) Homepage: https://powa.readthedocs.io/ Vcs-Browser: https://github.com/powa-team/pg_qualstats Vcs-Git: https://github.com/powa-team/pg_qualstats.git Package: postgresql-16-pg-qualstats Architecture: any Depends: ${misc:Depends}, ${shlibs:Depends}, ${postgresql:Depends} Description: PostgreSQL extension to gather statistics about predicates. This extensions tracks WHERE clauses predicates and JOIN predicates. Statistics will report whether the predicate was evaluated as an index scan or not, how many time the expression appeared, how many times the operator was executed and how filtering the expression is. If pg_stat_statements is enabled, it can also track to which statements the predicate belongs. pg_qualstats-2.1.1/debian/control.in000066400000000000000000000017341467511452200174700ustar00rootroot00000000000000Source: pg-qualstats Section: database Priority: optional Maintainer: Julien Rouhaud Uploaders: Christoph Berg , Standards-Version: 4.6.2 Rules-Requires-Root: no Build-Depends: debhelper-compat (= 13), postgresql-all (>= 217~) Homepage: https://powa.readthedocs.io/ Vcs-Browser: https://github.com/powa-team/pg_qualstats Vcs-Git: https://github.com/powa-team/pg_qualstats.git Package: postgresql-PGVERSION-pg-qualstats Architecture: any Depends: ${misc:Depends}, ${shlibs:Depends}, ${postgresql:Depends} Description: PostgreSQL extension to gather statistics about predicates. This extensions tracks WHERE clauses predicates and JOIN predicates. Statistics will report whether the predicate was evaluated as an index scan or not, how many time the expression appeared, how many times the operator was executed and how filtering the expression is. If pg_stat_statements is enabled, it can also track to which statements the predicate belongs. pg_qualstats-2.1.1/debian/copyright000066400000000000000000000025141467511452200174100ustar00rootroot00000000000000Copyright (c) 2014-2017 Ronan Dunklau Copyright (c) 2018-2024 The Powa-Team Permission to use, copy, modify, and distribute this software and its documentation for any purpose, without fee, and without a written agreement is hereby granted, provided that the above copyright notice and this paragraph and the following two paragraphs appear in all copies. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. Contributors to pg_qualstats: * Ronan Dunklau * Julien Rouhaud * Tomas Vondra * Adrien Nayrat * Andreas Seltenreich * Stéphane Tachoires * shribe * Pavel Trukhanov * Nicolas Gollet * Thomas Reiss * Vik Fearing * Pavel Trukhanov * Gürkan Gür * Devrim Gündüz * github user seqizz * github user akovac * Romain DEP pg_qualstats-2.1.1/debian/pgversions000066400000000000000000000000051467511452200175700ustar00rootroot000000000000009.4+ pg_qualstats-2.1.1/debian/rules000077500000000000000000000010441467511452200165320ustar00rootroot00000000000000#!/usr/bin/make -f PKGVER = $(shell dpkg-parsechangelog | awk -F '[:-]' '/^Version:/ { print substr($$2, 2) }') EXCLUDE = --exclude-vcs --exclude=debian override_dh_installdocs: dh_installdocs --all CONTRIBUTORS.md doc/README.md rm -rvf debian/*/usr/share/doc/postgresql-doc-* override_dh_pgxs_test: pg_buildext -o "shared_preload_libraries=pg_qualstats" installcheck . . postgresql-%v-pg-qualstats orig: debian/control clean cd .. && tar czf pg-qualstats_$(PKGVER).orig.tar.gz $(EXCLUDE) pg-qualstats-$(PKGVER) %: dh $@ --with pgxs_loop pg_qualstats-2.1.1/debian/source/000077500000000000000000000000001467511452200167535ustar00rootroot00000000000000pg_qualstats-2.1.1/debian/source/format000066400000000000000000000000141467511452200201610ustar00rootroot000000000000003.0 (quilt) pg_qualstats-2.1.1/debian/tests/000077500000000000000000000000001467511452200166155ustar00rootroot00000000000000pg_qualstats-2.1.1/debian/tests/control000066400000000000000000000001001467511452200202070ustar00rootroot00000000000000Depends: @, make Tests: installcheck Restrictions: allow-stderr pg_qualstats-2.1.1/debian/tests/installcheck000077500000000000000000000001301467511452200212010ustar00rootroot00000000000000#!/bin/sh set -eu pg_buildext -o "shared_preload_libraries=pg_qualstats" installcheck pg_qualstats-2.1.1/debian/watch000066400000000000000000000001151467511452200165010ustar00rootroot00000000000000version=4 https://github.com/powa-team/pg_qualstats/tags .*/([0-9.]*).tar.gz pg_qualstats-2.1.1/doc/000077500000000000000000000000001467511452200147765ustar00rootroot00000000000000pg_qualstats-2.1.1/doc/README.md000066400000000000000000000256111467511452200162620ustar00rootroot00000000000000pg_qualstats ============ pg_qualstats is a PostgreSQL extension keeping statistics on predicates found in ```WHERE``` statements and ```JOIN``` clauses. This is useful if you want to be able to analyze what are the most-often executed quals (predicates) on your database. The [powa](http://powa.readthedocs.io/) project makes use of this to provide advances index suggestions. It also allows you to identify correlated columns, by identifying which columns are most frequently queried together. The extension works by looking for known patterns in queries. Currently, this includes: - Binary OpExpr where at least one side is a column from a table. Whenever possible, the predicate will be swaped so that CONST OP VAR expressions are turned into VAR COMMUTED_OP CONST. AND and OR expression members are counted as separate entries. Ex: WHERE column1 = 2, WHERE column1 = column2, WHERE 3 = column3 - ScalarArrayOpExpr where the left side is a VAR, and the right side is an array constant. Those will be counted one time per element in the array. Ex: WHERE column1 IN (2, 3) will be counted as 2 occurences for the (column1, '=') operator pair - BooleanTest where the expression is a simple boolean column reference Ex: WHERE column1 IS TRUE Please not that clauses like WHERE columns1, WHERE NOT column1 won't be processed by pg_qualstats (yet) This extension also saves the first query text, as-is, for each distinct queryid executed, with a limit of **pg_qualstats.max** entries. Please not that the gathered data are not saved when the PostgreSQL server is restarted. Installation ------------ - Compatible with PostgreSQL 9.4 or later - Needs postgresql header files - sudo make install - Add pg_qualstats to the shared preload libraries: ``` shared_preload_libraries = 'pg_qualstats' ``` Configuration ------------- The following GUCs can be configured, in postgresql.conf: - *pg_qualstats.enabled* (boolean, default true): whether or not pg_qualstats should be enabled - *pg_qualstats.track_constants* (bolean, default true): whether or not pg_qualstats should keep track of each constant value individually. Disabling this GUC will considerably reduce the number of entries necessary to keep track of predicates. - *pg_qualstats.max*: the maximum number of predicated and query text tracked (defaults to 1000) - *pg_qualstats.resolve_oids* (boolean, default false): whether or not pg_qualstats should resolve oids at query time, or juste store the oids. Enabling this parameter makes the data analysis much more easy, since a connection to the database where the query was executed won't be necessary, but it will eat much more space (624 bytes per entry instead of 176). Additionnaly, this will require some catalog lookups, which aren't free. - *pg_qualstats.track_pg_catalog* (boolean, default false): whether or not pg_qualstats should compute predicates on object in pg_catalog schema. - *pg_qualstats.sample_rate* (double, default -1): the fraction of queries that should be sampled. For example, 0.1 means that only one out of ten queries will be sampled. The default (-1) means automatic, and results in a value of 1 / max_connections, so that statiscally, concurrency issues will be rare. Updating the extension ---------------------- Note that as all extensions configured in shared_preload_libraries, most of the changes are only applied once PostgreSQL is restarted with the new shared library version. The extension objects themselves only provides SQL wrappers to access internal data structures. Since version 2.0.4, an upgrade script is provided, allowing to upgade from the previous version only. If you want to upgrade the extension across multiple versions, or from a version older than 2.0.3, you will need top drop and recreate the extension to get the latest version. Usage ----- - Create the extension in any database: ``` CREATE EXTENSION pg_qualstats; ``` ### Functions The extension defines the following functions: - **pg_qualstats**: returns the counts for every qualifier, identified by the expression hash. This hash identifies each expression. - *userid*: oid of the user who executed the query. - *dbid*: oid of the database in which the query has been executed. - *lrelid*, *lattnum*: oid of the relation and attribute number of the VAR on the left hand side, if any. - *opno*: oid of the operator used in the expression - *rrelid*, *rattnum*: oid of the relation and attribute number of the VAR on the right hand side, if any. - *qualid*: normalized identifier of the parent "AND" expression, if any. This identifier is computed excluding the constants. This is useful for identifying predicates which are used together. - *uniquequalid*: unique identifier of the parent "AND" expression, if any. This identifier is computed including the constants. - *qualnodeid*: normalized identifier of this simple predicate. This identifier is computed excluding the constants. - *uniquequalnodeid*: unique identifier of this simple predicate. This identifier is computed including the constats. - *occurences*: number of time this predicate has been invoked, ie. number of related query execution. - *execution_count*: number of time this predicate has been executed, ie. number of rows it processed. - *nbfiltered*: number of tuples this predicate discarded. - *constant_position*: location of the constant in the original query string, as reported by the parser. - *queryid*: if pg_stats_statements is installed, the queryid identifying this query, otherwise NULL. - *constvalue*: a string representation of the right-hand side constant, if any, truncated to 80 bytes. Require to be *superuser* or member of *pg_read_all_stats* (since PostgreSQL 10), "" will be showed instead. - *eval_type*: evaluation type. 'f' for a predicate evaluated after a scan or 'i' for an index predicate. Example: ``` ro=# select * from pg_qualstats; userid │ dbid │ lrelid │ lattnum │ opno │ rrelid │ rattnum │ qualid │ uniquequalid │ qualnodeid │ uniquequalnodeid │ occurences │ execution_count │ nbfiltered │ constant_position │ queryid │ constvalue │ eval_type --------+-------+--------+---------+------+--------+---------+--------+--------------+------------+------------------+------------+-----------------+------------+-------------------+---------+----------------+----------- 10 │ 16384 │ 16385 │ 2 │ 98 │ │ 115075651 │ 1858640877 │ 1 │ 100000 │ 99999 │ 29 │ │ 'line 1'::text │ f 10 │ 16384 │ 16391 │ 2 │ 98 │ 16385 │ 2 │ │ 497379130 │ 497379130 │ 1 │ 0 │ 0 │ │ │ f ``` - **pg_qualstats_index_advisor(min_filter, min_selectivity, forbidden_am)**: Perform a global index suggestion. By default, only predicates filtering at least 1000 rows and 30% of the rows in average will be considered, but this can be passed as parameter. You can also provide an array of index access method if you want to avoid some. For instance, on PostgreSQL 9.6 and prior, `hash` indexes will be ignored as those weren't crash safe yet. Example: ``` SELECT v FROM json_array_elements( pg_qualstats_index_advisor(min_filter => 50)->'indexes') v ORDER BY v::text COLLATE "C"; v --------------------------------------------------------------- "CREATE INDEX ON public.adv USING btree (id1)" "CREATE INDEX ON public.adv USING btree (val, id1, id2, id3)" "CREATE INDEX ON public.pgqs USING btree (id)" (3 rows) SELECT v FROM json_array_elements( pg_qualstats_index_advisor(min_filter => 50)->'unoptimised') v ORDER BY v::text COLLATE "C"; v ----------------- "adv.val ~~* ?" (1 row) ``` - **pg_qualstats_deparse_qual**: format a stored predicate in the form `tablename.columname operatorname ?`. This is mostly for the global index advisor. - **pg_qualstats_get_idx_col**: for the given predicate, retrieve the underlying column name and all the possible operator class. This is mostly for the global index advisor. - **pg_qualstats_get_qualnode_rel**: for the given predicate, return the underlying table, fully qualified. This is mostly for the global index advisor - **pg_qualstats_example_queries**: return all the stored query texts. - **pg_qualstats_example_query**: return the stored query text for the given queryid if any, otherwise NULL. - **pg_qualstats_names**: return all the stored query texts. - **pg_qualstats_reset**: reset the internal counters and forget about every encountered qual. ### Views In addition to that, the extension defines some views on top of the pg_qualstats function: - **pg_qualstats**: filters calls to pg_qualstats() by the current database. - **pg_qualstats_pretty**: performs the appropriate joins to display a readable aggregated form for every attribute from the pg_qualstats view Example: ``` ro=# select * from pg_qualstats_pretty; left_schema | left_table | left_column | operator | right_schema | right_table | right_column | occurences | execution_count | nbfiltered -------------+------------------+-------------+--------------+--------------+-------------+--------------+------------+-----------------+------------ public | pgbench_accounts | aid | pg_catalog.= | | | | 5 | 5000000 | 4999995 public | pgbench_tellers | tid | pg_catalog.= | | | | 10 | 10000000 | 9999990 public | pgbench_branches | bid | pg_catalog.= | | | | 10 | 2000000 | 1999990 public | t1 | id | pg_catalog.= | public | t2 | id_t1 | 1 | 10000 | 9999 ``` - **pg_qualstats_all**: sums the counts for each attribute / operator pair, regardless of its position as an operand (LEFT or RIGHT), grouping together attributes used in AND clauses. Example: ``` ro=# select * from pg_qualstats_all; dbid | relid | userid | queryid | attnums | opno | qualid | occurences | execution_count | nbfiltered | qualnodeid -------+-------+--------+---------+---------+------+--------+------------+-----------------+------------+------------ 16384 | 16385 | 10 | | {2} | 98 | | 1 | 100000 | 99999 | 115075651 16384 | 16391 | 10 | | {2} | 98 | | 2 | 0 | 0 | 497379130 ``` - **pg_qualstats_by_query**: returns only predicates of the form VAR OPERATOR CONSTANT, aggregated by queryid. pg_qualstats-2.1.1/expected/000077500000000000000000000000001467511452200160325ustar00rootroot00000000000000pg_qualstats-2.1.1/expected/pg_qualstats.out000066400000000000000000000140071467511452200212740ustar00rootroot00000000000000CREATE SCHEMA "PGQS"; CREATE EXTENSION pg_qualstats WITH SCHEMA "PGQS"; -- Make sure that installcheck won't find previous data SELECT "PGQS".pg_qualstats_reset(); pg_qualstats_reset -------------------- (1 row) -- Make sure sure we'll see at least one qual SET pg_qualstats.sample_rate = 1; CREATE TABLE pgqs AS SELECT id, 'a'::text val FROM generate_series(1, 100) id; SELECT COUNT(*) FROM pgqs WHERE id = 1; count ------- 1 (1 row) SELECT lrelid::regclass::text, lattnum, occurences, execution_count, nbfiltered, constvalue, eval_type FROM "PGQS".pg_qualstats; lrelid | lattnum | occurences | execution_count | nbfiltered | constvalue | eval_type --------+---------+------------+-----------------+------------+------------+----------- pgqs | 1 | 1 | 100 | 99 | 1::integer | f (1 row) SELECT COUNT(*) > 0 FROM "PGQS".pg_qualstats; ?column? ---------- t (1 row) SELECT COUNT(*) > 0 FROM "PGQS".pg_qualstats(); ?column? ---------- t (1 row) SELECT COUNT(*) > 0 FROM "PGQS".pg_qualstats_example_queries(); ?column? ---------- t (1 row) SELECT "PGQS".pg_qualstats_reset(); pg_qualstats_reset -------------------- (1 row) SELECT COUNT(*) FROM "PGQS".pg_qualstats(); count ------- 0 (1 row) -- OpExpr sanity checks -- subquery_var operator const, shouldn't be tracked SELECT * FROM (SELECT * FROM pgqs LIMIT 0) pgqs WHERE pgqs.id = 0; id | val ----+----- (0 rows) SELECT COUNT(*) FROM "PGQS".pg_qualstats(); count ------- 0 (1 row) -- const non_commutable_operator var, should be tracked, var found on RHS SELECT * FROM pgqs WHERE 'meh' ~ val; id | val ----+----- (0 rows) SELECT lrelid::regclass, lattnum, rrelid::regclass, rattnum FROM "PGQS".pg_qualstats(); lrelid | lattnum | rrelid | rattnum --------+---------+--------+--------- | | pgqs | 2 (1 row) SELECT "PGQS".pg_qualstats_reset(); pg_qualstats_reset -------------------- (1 row) -- opexpr operator var and commuted, shouldn't be tracked SELECT * FROM pgqs WHERE id % 2 = 3; id | val ----+----- (0 rows) SELECT * FROM pgqs WHERE 3 = id % 2; id | val ----+----- (0 rows) SELECT COUNT(*) FROM "PGQS".pg_qualstats(); count ------- 0 (1 row) -- same query with handled commuted qual, which should be found as identical SELECT * FROM pgqs WHERE id = 0; id | val ----+----- (0 rows) SELECT * FROM pgqs WHERE 0 = id; id | val ----+----- (0 rows) SELECT lrelid::regclass, lattnum, rrelid::regclass, rattnum, sum(occurences) FROM "PGQS".pg_qualstats() GROUP by 1, 2, 3, 4; lrelid | lattnum | rrelid | rattnum | sum --------+---------+--------+---------+----- pgqs | 1 | | | 2 (1 row) SELECT COUNT(DISTINCT qualnodeid) FROM "PGQS".pg_qualstats(); count ------- 1 (1 row) -- (unique)qualid behavior SELECT "PGQS".pg_qualstats_reset(); pg_qualstats_reset -------------------- (1 row) -- There should be one group of 2 AND-ed quals, and 1 qual alone SELECT COUNT(*) FROM pgqs WHERE (id = 1) OR (id > 10 AND id < 20); count ------- 10 (1 row) SELECT CASE WHEN qualid IS NULL THEN 'OR-ed' ELSE 'AND-ed' END kind, COUNT(*) FROM "PGQS".pg_qualstats() GROUP BY 1 ORDER BY 2 DESC; kind | count --------+------- AND-ed | 2 OR-ed | 1 (2 rows) ---------------- -- index advisor ---------------- -- check that empty arrays are returned rather than NULL values SELECT "PGQS".pg_qualstats_reset(); pg_qualstats_reset -------------------- (1 row) SELECT * FROM "PGQS".pg_qualstats_index_advisor(50); pg_qualstats_index_advisor -------------------------------------- {"indexes" : [], "unoptimised" : []} (1 row) -- Test some naive scenario CREATE TABLE adv (id1 integer, id2 integer, id3 integer, val text); INSERT INTO adv SELECT i, i, i, 'line ' || i from generate_series(1, 1000) i; SELECT "PGQS".pg_qualstats_reset(); pg_qualstats_reset -------------------- (1 row) SELECT * FROM adv WHERE id1 < 0; id1 | id2 | id3 | val -----+-----+-----+----- (0 rows) SELECT count(*) FROM adv WHERE id1 < 500; count ------- 499 (1 row) SELECT * FROM adv WHERE val = 'meh'; id1 | id2 | id3 | val -----+-----+-----+----- (0 rows) SELECT * FROM adv WHERE id1 = 0 and val = 'meh'; id1 | id2 | id3 | val -----+-----+-----+----- (0 rows) SELECT * FROM adv WHERE id1 = 1 and val = 'meh'; id1 | id2 | id3 | val -----+-----+-----+----- (0 rows) SELECT * FROM adv WHERE id1 = 1 and id2 = 2 AND val = 'meh'; id1 | id2 | id3 | val -----+-----+-----+----- (0 rows) SELECT * FROM adv WHERE id1 = 6 and id2 = 6 AND id3 = 6 AND val = 'meh'; id1 | id2 | id3 | val -----+-----+-----+----- (0 rows) SELECT COUNT(*) FROM pgqs WHERE id = 1; count ------- 1 (1 row) -- non optimisable statements SELECT * FROM adv WHERE val ILIKE 'moh'; id1 | id2 | id3 | val -----+-----+-----+----- (0 rows) SELECT count(*) FROM adv WHERE val ILIKE 'moh'; count ------- 0 (1 row) SELECT * FROM adv WHERE val LIKE 'moh'; id1 | id2 | id3 | val -----+-----+-----+----- (0 rows) -- check the results SELECT v->'ddl' AS v FROM json_array_elements( "PGQS".pg_qualstats_index_advisor(50)->'indexes') v ORDER BY v::text COLLATE "C"; v --------------------------------------------------------------- "CREATE INDEX ON public.adv USING btree (id1)" "CREATE INDEX ON public.adv USING btree (val, id1, id2, id3)" "CREATE INDEX ON public.pgqs USING btree (id)" (3 rows) SELECT v->'qual' AS v FROM json_array_elements( "PGQS".pg_qualstats_index_advisor(50)->'unoptimised') v ORDER BY v::text COLLATE "C"; v ----------------- "adv.val ~~ ?" "adv.val ~~* ?" (2 rows) -- check quals on removed table DROP TABLE pgqs; SELECT v->'ddl' AS v FROM json_array_elements( "PGQS".pg_qualstats_index_advisor(50)->'indexes') v ORDER BY v::text COLLATE "C"; v --------------------------------------------------------------- "CREATE INDEX ON public.adv USING btree (id1)" "CREATE INDEX ON public.adv USING btree (val, id1, id2, id3)" (2 rows) pg_qualstats-2.1.1/pg_qualstats--2.0.4--2.1.0.sql000066400000000000000000000264401467511452200207140ustar00rootroot00000000000000-- complain if script is sourced in psql, rather than via CREATE EXTENSION \echo Use "ALTER EXTENSION pg_qualstats UPDATE" to load this file. \quit CREATE OR REPLACE FUNCTION @extschema@.pg_qualstats_index_advisor ( min_filter integer DEFAULT 1000, min_selectivity integer DEFAULT 30, forbidden_am text[] DEFAULT '{}') RETURNS json AS $_$ DECLARE v_processed bigint[] = '{}'; v_indexes json[] = '{}'; v_unoptimised json[] = '{}'; rec record; v_nb_processed integer = 1; v_ddl text; v_col text; v_qualnodeid bigint; v_quals_todo bigint[]; v_quals_done bigint[]; v_quals_col_done text[]; v_queryids bigint[] = '{}'; BEGIN -- sanity checks and default values SELECT coalesce(min_filter, 1000), coalesce(min_selectivity, 30), coalesce(forbidden_am, '{}') INTO min_filter, min_selectivity, forbidden_am; -- don't try to generate hash indexes Before pg 10, as those are only WAL -- logged since pg 11. IF pg_catalog.current_setting('server_version_num')::bigint < 100000 THEN forbidden_am := array_append(forbidden_am, 'hash'); END IF; -- first find out unoptimizable quals. -- We need an array of json containing the per-qual info, and a single -- array containing all the underlying qualnodeids, so we need to create -- the wanted final object manually as we can't have two different grouping -- approach. FOR rec IN WITH src AS (SELECT DISTINCT qualnodeid, (coalesce(lrelid, rrelid), coalesce(lattnum, rattnum), opno, eval_type)::@extschema@.qual AS qual, queryid FROM @extschema@.pg_qualstats() q JOIN pg_catalog.pg_database d ON q.dbid = d.oid LEFT JOIN pg_catalog.pg_operator op ON op.oid = q.opno LEFT JOIN pg_catalog.pg_amop amop ON amop.amopopr = op.oid LEFT JOIN pg_catalog.pg_am am ON am.oid = amop.amopmethod WHERE d.datname = current_database() AND eval_type = 'f' AND coalesce(lrelid, rrelid) != 0 AND amname IS NULL ) SELECT pg_catalog.json_build_object( 'qual', @extschema@.pg_qualstats_deparse_qual(qual), -- be careful to generate an empty array if no queryid availiable 'queryids', coalesce(pg_catalog.array_agg(DISTINCT queryid) FILTER (WHERE queryid IS NOT NULL), '{}') ) AS obj, array_agg(qualnodeid) AS qualnodeids FROM src GROUP BY qual LOOP v_unoptimised := array_append(v_unoptimised, rec.obj); v_processed := array_cat(v_processed, rec.qualnodeids); END LOOP; -- The index suggestion is done in multiple iteration, by scoring for each -- relation containing interesting quals a path of possibly AND-ed quals -- that contains other possibly AND-ed quals. Only the higher score path -- will be used to create an index, so we can then compute another set of -- paths ignoring the quals that are now optimized with an index. WHILE v_nb_processed > 0 LOOP v_nb_processed := 0; FOR rec IN -- first, find quals that seems worth to optimize along with the -- possible access methods, discarding any qualnode that are marked as -- already processed. Also apply access method restriction. WITH pgqs AS ( SELECT dbid, amname, qualid, qualnodeid, (coalesce(lrelid, rrelid), coalesce(lattnum, rattnum), opno, eval_type)::@extschema@.qual AS qual, queryid, round(avg(execution_count)) AS execution_count, sum(occurences) AS occurences, round(sum(nbfiltered)::numeric / sum(occurences)) AS avg_filter, CASE WHEN sum(execution_count) = 0 THEN 0 ELSE round(sum(nbfiltered::numeric) / sum(execution_count) * 100) END AS avg_selectivity FROM @extschema@.pg_qualstats() q JOIN pg_catalog.pg_database d ON q.dbid = d.oid JOIN pg_catalog.pg_operator op ON op.oid = q.opno JOIN pg_catalog.pg_amop amop ON amop.amopopr = op.oid JOIN pg_catalog.pg_am am ON am.oid = amop.amopmethod WHERE d.datname = current_database() AND eval_type = 'f' AND amname != ALL (forbidden_am) AND coalesce(lrelid, rrelid) != 0 AND qualnodeid != ALL(v_processed) GROUP BY dbid, amname, qualid, qualnodeid, lrelid, rrelid, lattnum, rattnum, opno, eval_type, queryid ), -- apply cardinality and selectivity restrictions filtered AS ( SELECT (qual).relid, amname, coalesce(qualid, qualnodeid) AS parent, count(*) AS weight, (array_agg(qualnodeid), array_agg(queryid) )::@extschema@.adv_quals AS quals FROM pgqs WHERE avg_filter >= min_filter AND avg_selectivity >= min_selectivity GROUP BY (qual).relid, amname, parent ), -- for each possibly AND-ed qual, build the list of included qualnodeid nodes AS ( SELECT p.relid, p.amname, p.parent, p.quals, c.quals AS children FROM filtered p LEFT JOIN filtered c ON (p.quals).qualnodeids @> (c.quals).qualnodeids AND p.amname = c.amname AND p.parent != c.parent AND (p.quals).qualnodeids != (c.quals).qualnodeids ), -- build the "paths", which is the list of AND-ed quals that entirely -- contains another possibly AND-ed quals, and give a score for each -- path. The scoring method used here is simply the number of -- columns in the quals. paths AS ( SELECT DISTINCT *, coalesce(pg_catalog.array_length((children).qualnodeids, 1), 0) AS weight FROM nodes UNION SELECT DISTINCT p.relid, p.amname, p.parent, p.quals, c.children, coalesce(pg_catalog.array_length((c.children).qualnodeids, 1), 0) AS weight FROM nodes p JOIN nodes c ON (p.children).qualnodeids @> (c.quals).qualnodeids AND (c.quals).qualnodeids IS NOT NULL AND (c.quals).qualnodeids != (p.quals).qualnodeids AND p.amname = c.amname ), -- compute the final paths. -- The scoring method used here is simply the sum of total -- number of columns in each possibly AND-ed quals, so that we can -- later chose to create indexes that optimize as many queries as -- possible with as few indexes as possible. -- We also compute here an access method weight, so that we can later -- choose a btree index rather than another access method if btree is -- available. computed AS ( SELECT relid, amname, parent, quals, array_agg(to_json(children) ORDER BY weight) FILTER (WHERE children IS NOT NULL) AS included, pg_catalog.array_length((quals).qualnodeids, 1) + sum(weight) AS path_weight, CASE amname WHEN 'btree' THEN 1 ELSE 2 END AS amweight FROM paths GROUP BY relid, amname, parent, quals ), -- compute a rank for each final paths, per relation. final AS ( SELECT relid, amname, parent, quals, included, path_weight, amweight, row_number() OVER ( PARTITION BY relid ORDER BY path_weight DESC, amweight) AS rownum FROM computed ) -- and finally choose the higher rank final path for each relation. SELECT relid, amname, parent, (quals).qualnodeids as quals, (quals).queryids as queryids, included, path_weight FROM final WHERE rownum = 1 LOOP v_nb_processed := v_nb_processed + 1; v_ddl := ''; v_quals_todo := '{}'; v_quals_done := '{}'; v_quals_col_done := '{}'; -- put columns from included quals, if any, first for order dependency DECLARE v_cur json; BEGIN IF rec.included IS NOT NULL THEN FOREACH v_cur IN ARRAY rec.included LOOP -- Direct cast from json to bigint is only possible since pg10 FOR v_qualnodeid IN SELECT pg_catalog.json_array_elements(v_cur->'qualnodeids')::text::bigint LOOP v_quals_todo := v_quals_todo || v_qualnodeid; END LOOP; END LOOP; END IF; END; -- and append qual's own columns v_quals_todo := v_quals_todo || rec.quals; -- generate the index DDL FOREACH v_qualnodeid IN ARRAY v_quals_todo LOOP -- skip quals already present in the index CONTINUE WHEN v_quals_done @> ARRAY[v_qualnodeid]; -- skip other quals for the same column v_col := @extschema@.pg_qualstats_get_idx_col(v_qualnodeid, false); CONTINUE WHEN v_quals_col_done @> ARRAY[v_col]; -- mark this qual as present in a generated index so it's ignore at -- next round of best quals to optimize v_processed := pg_catalog.array_append(v_processed, v_qualnodeid); -- mark this qual and col as present in this index v_quals_done := v_quals_done || v_qualnodeid; v_quals_col_done := v_quals_col_done || v_col; -- if underlying table has been dropped, stop here CONTINUE WHEN coalesce(v_col, '') = ''; -- append the column to the index IF v_ddl != '' THEN v_ddl := v_ddl || ', '; END IF; v_ddl := v_ddl || @extschema@.pg_qualstats_get_idx_col(v_qualnodeid, true); END LOOP; -- if underlying table has been dropped, skip this (broken) index CONTINUE WHEN coalesce(v_ddl, '') = ''; -- generate the full CREATE INDEX ddl v_ddl = pg_catalog.format('CREATE INDEX ON %s USING %I (%s)', @extschema@.pg_qualstats_get_qualnode_rel(v_qualnodeid), rec.amname, v_ddl); -- get the underlyings queryid(s) DECLARE v_queryid text; v_cur json; BEGIN v_queryids = rec.queryids; IF rec.included IS NOT NULL THEN FOREACH v_cur IN ARRAY rec.included LOOP -- Direct cast from json to bigint is only possible since pg10 FOR v_queryid IN SELECT pg_catalog.json_array_elements(v_cur->'queryids')::text LOOP CONTINUE WHEN v_queryid = 'null'; v_queryids := v_queryids || v_queryid::text::bigint; END LOOP; END LOOP; END IF; END; -- remove any duplicates SELECT pg_catalog.array_agg(DISTINCT v) INTO v_queryids FROM (SELECT unnest(v_queryids)) s(v); -- sanitize the queryids IF v_queryids IS NULL OR v_queryids = '{null}' THEN v_queryids = '{}'; END IF; -- and finally append the index to the list of generated indexes v_indexes := pg_catalog.array_append(v_indexes, pg_catalog.json_build_object( 'ddl', v_ddl, 'queryids', v_queryids ) ); END LOOP; END LOOP; RETURN pg_catalog.json_build_object( 'indexes', v_indexes, 'unoptimised', v_unoptimised); END; $_$ LANGUAGE plpgsql; /* end of pg_qualstats_index_advisor */ pg_qualstats-2.1.1/pg_qualstats--2.0.4.sql000066400000000000000000000565611467511452200202120ustar00rootroot00000000000000/*""" .. function:: pg_qualstats_reset() Resets statistics gathered by pg_qualstats. */ CREATE FUNCTION pg_qualstats_reset() RETURNS void AS 'MODULE_PATHNAME' LANGUAGE C; /*""" .. function pg_qualstats_example_query(bigint) Returns an example for a normalized query, given its queryid */ CREATE FUNCTION pg_qualstats_example_query(bigint) RETURNS text AS 'MODULE_PATHNAME' LANGUAGE C; /*""" .. function pg_qualstats_example_queries() Returns all the example queries with their associated queryid */ CREATE FUNCTION pg_qualstats_example_queries(OUT queryid bigint, OUT query text) RETURNS SETOF record AS 'MODULE_PATHNAME' LANGUAGE C; /*""" .. function:: pg_qualstats() Returns: A SETOF record containing the data gathered by pg_qualstats Attributes: userid (oid): the user who executed the query dbid (oid): the database on which the query was executed lrelid (oid): oid of the relation on the left hand side lattnum (attnum): attribute number of the column on the left hand side opno (oid): oid of the operator used in the expression rrelid (oid): oid of the relation on the right hand side rattnum (attnum): attribute number of the column on the right hand side qualid(bigint): hash of the parent ``AND`` expression, if any. This is useful for identifying predicates which are used together. uniquequalid(bigint): hash of the parent ``AND`` expression, if any, including the constant values. qualnodeid(bigint): the predicate hash. uniquequalnodeid(bigint): the predicate hash. Everything (down to constants) is used to compute this hash occurences (bigint): the number of times this predicate has been seen execution_count (bigint): the total number of execution of this predicate. nbfiltered (bigint): the number of lines filtered by this predicate min_err_estimate_ratio(double precision): the minimum selectivity estimation error ratio for this predicate max_err_estimate_ratio(double precision): the maximum selectivity estimation error ratio for this predicate mean_err_estimate_ratio(double precision): the mean selectivity estimation error ratio for this predicate stddev_err_estimate_ratio(double precision): the standard deviation for selectivity estimation error ratio for this predicate min_err_estimate_num(bigint): the minimum number of line for selectivity estimation error for this predicate max_err_estimate_num(bigint): the maximum number of line for selectivity estimation error for this predicate mean_err_estimate_num(double precision): the mean number of line for selectivity estimation error for this predicate stddev_err_estimate_num(double precision): the standard deviation for number of line for selectivity estimation error for this predicate constant_position (int): the position of the constant in the original query, as filled by the lexer. queryid (bigint): the queryid identifying this query, as generated by pg_stat_statements constvalue (varchar): a string representation of the right-hand side constant, if any, truncated to 80 bytes. eval_type (char): the evaluation type. Possible values are ``f`` for execution as a filter (ie, after a Scan) or ``i`` if it was evaluated as an index predicate. If the qual is evaluated as an index predicate, then the nbfiltered value will most likely be 0, except if there was any rechecked conditions. Example: .. code-block:: sql powa=# select * from powa_statements where queryid != 2; powa=# select * from pg_qualstats(); -[ RECORD 1 ]-----+----------- userid | 10 dbid | 32799 lrelid | 189341 lattnum | 2 opno | 417 rrelid | rattnum | qualid | uniquequalid | qualnodeid | 1391544855 uniquequalnodeid | 551979005 occurences | 1 execution_count | 31 nbfiltered | 0 min_err_estimate_ratio | 32.741935483871 max_err_estimate_ratio | 32.741935483871 mean_err_estimate_ratio | 32.741935483871 stddev_err_estimate_ratio | 0 min_err_estimate_num | 984 max_err_estimate_num | 984 mean_err_estimate_num | 984 stddev_err_estimate_num | 0 constant_position | 47 queryid | -6668685762776610659 constvalue | 2::integer eval_type | f */ CREATE FUNCTION pg_qualstats( OUT userid oid, OUT dbid oid, OUT lrelid oid, OUT lattnum smallint, OUT opno oid, OUT rrelid oid, OUT rattnum smallint, OUT qualid bigint, OUT uniquequalid bigint, OUT qualnodeid bigint, OUT uniquequalnodeid bigint, OUT occurences bigint, OUT execution_count bigint, OUT nbfiltered bigint, OUT min_err_estimate_ratio double precision, OUT max_err_estimate_ratio double precision, OUT mean_err_estimate_ratio double precision, OUT stddev_err_estimate_ratio double precision, OUT min_err_estimate_num bigint, OUT max_err_estimate_num bigint, OUT mean_err_estimate_num double precision, OUT stddev_err_estimate_num double precision, OUT constant_position int, OUT queryid bigint, OUT constvalue varchar, OUT eval_type "char" ) RETURNS SETOF record AS 'MODULE_PATHNAME', 'pg_qualstats_2_0' LANGUAGE C STRICT VOLATILE; /*""" .. function:: pg_qualstats_names() This function is the same as pg_qualstats, but with additional columns corresponding to the resolved names, if ``pg_qualstats.resolve_oids`` is set to ``true``. Returns: The same set of columns than :func:`pg_qualstats()`, plus the following ones: rolname (text): the name of the role executing the query. Corresponds to userid. dbname (text): the name of the database on which the query was executed. Corresponds to dbid. lrelname (text): the name of the relation on the left-hand side of the qual. Corresponds to lrelid. lattname (text): the name of the attribute (column) on the left-hand side of the qual. Corresponds to rrelid. opname (text): the name of the operator. Corresponds to opno. */ CREATE FUNCTION pg_qualstats_names( OUT userid oid, OUT dbid oid, OUT lrelid oid, OUT lattnum smallint, OUT opno oid, OUT rrelid oid, OUT rattnum smallint, OUT qualid bigint, OUT uniquequalid bigint, OUT qualnodeid bigint, OUT uniquequalnodeid bigint, OUT occurences bigint, OUT execution_count bigint, OUT nbfiltered bigint, OUT min_err_estimate_ratio double precision, OUT max_err_estimate_ratio double precision, OUT mean_err_estimate_ratio double precision, OUT stddev_err_estimate_ratio double precision, OUT min_err_estimate_num bigint, OUT max_err_estimate_num bigint, OUT mean_err_estimate_num double precision, OUT stddev_err_estimate_num double precision, OUT constant_position int, OUT queryid bigint, OUT constvalue varchar, OUT eval_type "char", OUT rolname text, OUT dbname text, OUT lrelname text, OUT lattname text, OUT opname text, OUT rrelname text, OUT rattname text ) RETURNS SETOF record AS 'MODULE_PATHNAME', 'pg_qualstats_names_2_0' LANGUAGE C STRICT VOLATILE; -- Register a view on the function for ease of use. /*""" .. view:: pg_qualstats This view is just a simple wrapper on the :func:`pg_qualstats()` function, filtering on the current database for convenience. */ CREATE VIEW pg_qualstats AS SELECT qs.* FROM pg_qualstats() qs INNER JOIN pg_database on qs.dbid = pg_database.oid WHERE pg_database.datname = current_database(); GRANT SELECT ON pg_qualstats TO PUBLIC; -- Don't want this to be available to non-superusers. REVOKE ALL ON FUNCTION pg_qualstats_reset() FROM PUBLIC; /*""" .. view:: pg_qualstats_pretty This view resolves oid "on the fly", for the current database. Returns: left_schema (name): the name of the left-hand side relation's schema. left_table (name): the name of the left-hand side relation. left_column (name): the name of the left-hand side attribute. operator (name): the name of the operator. right_schema (name): the name of the right-hand side relation's schema. right_table (name): the name of the right-hand side relation. right_column (name): the name of the operator. execution_count (bigint): the total number of time this qual was executed. nbfiltered (bigint): the total number of tuples filtered by this qual. */ CREATE VIEW pg_qualstats_pretty AS select nl.nspname as left_schema, al.attrelid::regclass as left_table, al.attname as left_column, opno::regoper::text as operator, nr.nspname as right_schema, ar.attrelid::regclass as right_table, ar.attname as right_column, sum(occurences) as occurences, sum(execution_count) as execution_count, sum(nbfiltered) as nbfiltered from pg_qualstats qs left join (pg_class cl inner join pg_namespace nl on nl.oid = cl.relnamespace) on cl.oid = qs.lrelid left join (pg_class cr inner join pg_namespace nr on nr.oid = cr.relnamespace) on cr.oid = qs.rrelid left join pg_attribute al on al.attrelid = qs.lrelid and al.attnum = qs.lattnum left join pg_attribute ar on ar.attrelid = qs.rrelid and ar.attnum = qs.rattnum group by al.attrelid, al.attname, ar.attrelid, ar.attname, opno, nl.nspname, nr.nspname ; CREATE OR REPLACE VIEW pg_qualstats_all AS SELECT dbid, relid, userid, queryid, array_agg(distinct attnum) as attnums, opno, max(qualid) as qualid, sum(occurences) as occurences, sum(execution_count) as execution_count, sum(nbfiltered) as nbfiltered, coalesce(qualid, qualnodeid) as qualnodeid FROM ( SELECT qs.dbid, CASE WHEN lrelid IS NOT NULL THEN lrelid WHEN rrelid IS NOT NULL THEN rrelid END as relid, qs.userid as userid, CASE WHEN lrelid IS NOT NULL THEN lattnum WHEN rrelid IS NOT NULL THEN rattnum END as attnum, qs.opno as opno, qs.qualid as qualid, qs.qualnodeid as qualnodeid, qs.occurences as occurences, qs.execution_count as execution_count, qs.nbfiltered as nbfiltered, qs.queryid FROM pg_qualstats() qs WHERE lrelid IS NOT NULL or rrelid IS NOT NULL ) t GROUP BY dbid, relid, userid, queryid, opno, coalesce(qualid, qualnodeid) ; /*""" .. type:: qual Attributes: relid (oid): the relation oid attnum (integer): the attribute number opno (oid): the operator oid eval_type (char): the evaluation type. See :func:`pg_qualstats()` for an explanation of the eval_type. */ CREATE TYPE qual AS ( relid oid, attnum integer, opno oid, eval_type "char" ); /*""" .. type:: qualname Pendant of :type:`qual`, but with names instead of oids Attributes: relname (text): the relation oid attname (text): the attribute number opname (text): the operator name eval_type (char): the evaluation type. See :func:`pg_qualstats()` for an explanation of the eval_type. */ CREATE TYPE qualname AS ( relname text, attnname text, opname text, eval_type "char" ); CREATE OR REPLACE VIEW pg_qualstats_by_query AS SELECT coalesce(uniquequalid, uniquequalnodeid) as uniquequalnodeid, dbid, userid, coalesce(qualid, qualnodeid) as qualnodeid, occurences, execution_count, nbfiltered, queryid, array_agg(constvalue order by constant_position) as constvalues, array_agg(ROW(relid, attnum, opno, eval_type)::qual) as quals FROM ( SELECT qs.dbid, CASE WHEN lrelid IS NOT NULL THEN lrelid WHEN rrelid IS NOT NULL THEN rrelid END as relid, qs.userid as userid, CASE WHEN lrelid IS NOT NULL THEN lattnum WHEN rrelid IS NOT NULL THEN rattnum END as attnum, qs.opno as opno, qs.qualid as qualid, qs.uniquequalid as uniquequalid, qs.qualnodeid as qualnodeid, qs.uniquequalnodeid as uniquequalnodeid, qs.occurences as occurences, qs.execution_count as execution_count, qs.queryid as queryid, qs.constvalue as constvalue, qs.nbfiltered as nbfiltered, qs.eval_type, qs.constant_position FROM pg_qualstats() qs WHERE (qs.lrelid IS NULL) != (qs.rrelid IS NULL) ) i GROUP BY coalesce(uniquequalid, uniquequalnodeid), coalesce(qualid, qualnodeid), dbid, userid, occurences, execution_count, nbfiltered, queryid ; CREATE OR REPLACE FUNCTION pg_qualstats_deparse_qual(qual qual) RETURNS TEXT AS $_$ SELECT pg_catalog.format('%I.%I %s ?', c.oid::regclass, a.attname, o.oprname) FROM pg_catalog.pg_class c JOIN pg_catalog.pg_attribute a ON a.attrelid = c.oid JOIN pg_catalog.pg_operator o ON o.oid = qual.opno WHERE c.oid = qual.relid AND a.attnum = qual.attnum $_$ LANGUAGE sql; CREATE OR REPLACE FUNCTION pg_qualstats_get_qualnode_rel(bigint) RETURNS TEXT AS $_$ SELECT pg_catalog.quote_ident(n.nspname) || '.' || pg_catalog.quote_ident(c.relname) FROM pg_qualstats() q JOIN pg_catalog.pg_class c ON coalesce(q.lrelid, q.rrelid) = c.oid JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace WHERE q.qualnodeid = $1 $_$ LANGUAGE sql; CREATE OR REPLACE FUNCTION pg_qualstats_get_idx_col(bigint, include_nondefault_opclass boolean = true) RETURNS TEXT AS $_$ SELECT pg_catalog.quote_ident(a.attname) || CASE WHEN include_nondefault_opclass THEN CASE WHEN opc.opcdefault THEN '' ELSE ' ' || pg_catalog.quote_ident(opcname) END ELSE '' END FROM pg_qualstats() q JOIN pg_catalog.pg_class c ON coalesce(q.lrelid, q.rrelid) = c.oid JOIN pg_catalog.pg_attribute a ON a.attrelid = c.oid AND a.attnum = coalesce(q.lattnum, q.rattnum) JOIN pg_catalog.pg_operator op ON op.oid = q.opno JOIN pg_catalog.pg_amop amop ON amop.amopopr = op.oid JOIN pg_catalog.pg_am am ON am.oid = amop.amopmethod JOIN pg_catalog.pg_opfamily f ON f.opfmethod = am.oid AND amop.amopfamily = f.oid JOIN pg_catalog.pg_opclass opc ON opc.opcfamily = f.oid WHERE q.qualnodeid = $1 ORDER BY CASE opcdefault WHEN true THEN 0 ELSE 1 END; $_$ LANGUAGE sql; CREATE OR REPLACE FUNCTION pg_qualstats_index_advisor ( min_filter integer DEFAULT 1000, min_selectivity integer DEFAULT 30, forbidden_am text[] DEFAULT '{}') RETURNS json AS $_$ DECLARE v_res json; v_processed bigint[] = '{}'; v_indexes text[] = '{}'; v_unoptimised text[] = '{}'; rec record; v_nb_processed integer = 1; v_ddl text; v_col text; v_cur json; v_qualnodeid bigint; v_quals_todo bigint[]; v_quals_done bigint[]; v_quals_col_done text[]; BEGIN -- sanity checks and default values SELECT coalesce(min_filter, 1000), coalesce(min_selectivity, 30), coalesce(forbidden_am, '{}') INTO min_filter, min_selectivity, forbidden_am; -- don't try to generate hash indexes Before pg 10, as those are only WAL -- logged since pg 11. IF pg_catalog.current_setting('server_version_num')::bigint < 100000 THEN forbidden_am := array_append(forbidden_am, 'hash'); END IF; -- first find out unoptimizable quals FOR rec IN SELECT DISTINCT qualnodeid, (coalesce(lrelid, rrelid), coalesce(lattnum, rattnum), opno, eval_type)::qual AS qual FROM pg_qualstats() q JOIN pg_catalog.pg_database d ON q.dbid = d.oid LEFT JOIN pg_catalog.pg_operator op ON op.oid = q.opno LEFT JOIN pg_catalog.pg_amop amop ON amop.amopopr = op.oid LEFT JOIN pg_catalog.pg_am am ON am.oid = amop.amopmethod WHERE d.datname = current_database() AND eval_type = 'f' AND coalesce(lrelid, rrelid) != 0 AND amname IS NULL LOOP v_unoptimised := pg_catalog.array_append(v_unoptimised, pg_qualstats_deparse_qual(rec.qual)); v_processed := pg_catalog.array_append(v_processed, rec.qualnodeid); END LOOP; -- The index suggestion is done in multiple iteration, by scoring for each -- relation containing interesting quals a path of possibly AND-ed quals -- that contains other possibly AND-ed quals. Only the higher score path -- will be used to create an index, so we can then compute another set of -- paths ignoring the quals that are now optimized with an index. WHILE v_nb_processed > 0 LOOP v_nb_processed := 0; FOR rec IN -- first, find quals that seems worth to optimize along with the -- possible access methods, discarding any qualnode that are marked as -- already processed. Also apply access method restriction. WITH pgqs AS ( SELECT dbid, amname, qualid, qualnodeid, (coalesce(lrelid, rrelid), coalesce(lattnum, rattnum), opno, eval_type)::qual AS qual, round(avg(execution_count)) AS execution_count, sum(occurences) AS occurences, round(sum(nbfiltered)::numeric / sum(occurences)) AS avg_filter, CASE WHEN sum(execution_count) = 0 THEN 0 ELSE round(sum(nbfiltered::numeric) / sum(execution_count) * 100) END AS avg_selectivity FROM pg_qualstats() q JOIN pg_catalog.pg_database d ON q.dbid = d.oid JOIN pg_catalog.pg_operator op ON op.oid = q.opno JOIN pg_catalog.pg_amop amop ON amop.amopopr = op.oid JOIN pg_catalog.pg_am am ON am.oid = amop.amopmethod WHERE d.datname = current_database() AND eval_type = 'f' AND amname != ALL (forbidden_am) AND coalesce(lrelid, rrelid) != 0 AND qualnodeid != ALL(v_processed) GROUP BY dbid, amname, qualid, qualnodeid, lrelid, rrelid, lattnum, rattnum, opno, eval_type ), -- apply cardinality and selectivity restrictions filtered AS ( SELECT (qual).relid, amname, coalesce(qualid, qualnodeid) AS parent, count(*) AS weight, array_agg(qualnodeid) AS quals FROM pgqs WHERE avg_filter >= min_filter AND avg_selectivity >= min_selectivity GROUP BY (qual).relid, amname, parent ), -- for each possibly AND-ed qual, build the list of included qualnodeid nodes AS ( SELECT p.relid, p.amname, p.parent, p.quals, c.quals AS children FROM filtered p LEFT JOIN filtered c ON p.quals @> c.quals AND p.amname = c.amname AND p.parent != c.parent AND p.quals != c.quals ), -- build the "paths", which is the list of AND-ed quals that entirely -- contains another possibly AND-ed quals, and give a score for each -- path. The scoring method used here is simply the number of -- columns in the quals. paths AS ( SELECT DISTINCT *, coalesce(pg_catalog.array_length(children, 1), 0) AS weight FROM nodes UNION SELECT DISTINCT p.relid, p.amname, p.parent, p.quals, c.children, coalesce(pg_catalog.array_length(c.children, 1), 0) AS weight FROM nodes p JOIN nodes c ON p.children @> c.quals AND c.quals IS NOT NULL AND c.quals != p.quals AND p.amname = c.amname ), -- compute the final paths. -- The scoring method used here is simply the sum of total -- number of columns in each possibly AND-ed quals, so that we can -- later chose to create indexes that optimize as many queries as -- possible with as few indexes as possible. -- We also compute here an access method weight, so that we can later -- choose a btree index rather than another access method if btree is -- available. computed AS ( SELECT relid, amname, parent, quals, array_agg(to_json(children) ORDER BY weight) FILTER (WHERE children IS NOT NULL) AS included, pg_catalog.array_length(quals, 1) + sum(weight) AS path_weight, CASE amname WHEN 'btree' THEN 1 ELSE 2 END AS amweight FROM paths GROUP BY relid, amname, parent, quals ), -- compute a rank for each final paths, per relation. final AS ( SELECT relid, amname, parent, quals, included, path_weight, amweight, row_number() OVER ( PARTITION BY relid ORDER BY path_weight DESC, amweight) AS rownum FROM computed ) -- and finally choose the higher rank final path for each relation. SELECT relid, amname, parent, quals, included, path_weight FROM final WHERE rownum = 1 LOOP v_nb_processed := v_nb_processed + 1; v_ddl := ''; v_quals_todo := '{}'; v_quals_done := '{}'; v_quals_col_done := '{}'; -- put columns from included quals, if any, first for order dependency IF rec.included IS NOT NULL THEN FOREACH v_cur IN ARRAY rec.included LOOP -- Direct cast from json to bigint is only possible since pg10 FOR v_qualnodeid IN SELECT pg_catalog.json_array_elements(v_cur)::text::bigint LOOP v_quals_todo := v_quals_todo || v_qualnodeid; END LOOP; END LOOP; END IF; -- and append qual's own columns v_quals_todo := v_quals_todo || rec.quals; -- generate the index DDL FOREACH v_qualnodeid IN ARRAY v_quals_todo LOOP -- skip quals already present in the index CONTINUE WHEN v_quals_done @> ARRAY[v_qualnodeid]; -- skip other quals for the same column v_col := pg_qualstats_get_idx_col(v_qualnodeid, false); CONTINUE WHEN v_quals_col_done @> ARRAY[v_col]; -- mark this qual as present in a generated index so it's ignore at -- next round of best quals to optimize v_processed := pg_catalog.array_append(v_processed, v_qualnodeid); -- mark this qual and col as present in this index v_quals_done := v_quals_done || v_qualnodeid; v_quals_col_done := v_quals_col_done || v_col; -- if underlying table has been dropped, stop here CONTINUE WHEN coalesce(v_col, '') = ''; -- append the column to the index IF v_ddl != '' THEN v_ddl := v_ddl || ', '; END IF; v_ddl := v_ddl || pg_qualstats_get_idx_col(v_qualnodeid, true); END LOOP; -- if underlying table has been dropped, skip this (broken) index CONTINUE WHEN coalesce(v_ddl, '') = ''; -- generate the full CREATE INDEX ddl v_ddl = pg_catalog.format('CREATE INDEX ON %s USING %I (%s)', pg_qualstats_get_qualnode_rel(v_qualnodeid), rec.amname, v_ddl); -- and append it to the list of generated indexes v_indexes := array_append(v_indexes, v_ddl); END LOOP; END LOOP; v_res := pg_catalog.json_build_object('indexes', v_indexes, 'unoptimised', v_unoptimised); RETURN v_res; END; $_$ LANGUAGE plpgsql; pg_qualstats-2.1.1/pg_qualstats--2.1.0--2.1.1.sql000066400000000000000000000266771467511452200207260ustar00rootroot00000000000000-- complain if script is sourced in psql, rather than via CREATE EXTENSION \echo Use "ALTER EXTENSION pg_qualstats UPDATE" to load this file. \quit CREATE OR REPLACE FUNCTION @extschema@.pg_qualstats_index_advisor ( min_filter integer DEFAULT 1000, min_selectivity integer DEFAULT 30, forbidden_am text[] DEFAULT '{}') RETURNS json AS $_$ DECLARE v_processed bigint[] = '{}'; v_indexes json[] = '{}'; v_unoptimised json[] = '{}'; rec record; v_nb_processed integer = 1; v_ddl text; v_col text; v_qualnodeid bigint; v_quals_todo bigint[]; v_quals_done bigint[]; v_quals_col_done text[]; v_queryids bigint[] = '{}'; BEGIN -- sanity checks and default values SELECT coalesce(min_filter, 1000), coalesce(min_selectivity, 30), coalesce(forbidden_am, '{}') INTO min_filter, min_selectivity, forbidden_am; -- don't try to generate hash indexes Before pg 10, as those are only WAL -- logged since pg 11. IF pg_catalog.current_setting('server_version_num')::bigint < 100000 THEN forbidden_am := array_append(forbidden_am, 'hash'); END IF; -- first find out unoptimizable quals. -- We need an array of json containing the per-qual info, and a single -- array containing all the underlying qualnodeids, so we need to create -- the wanted final object manually as we can't have two different grouping -- approach. FOR rec IN WITH src AS (SELECT DISTINCT qualnodeid, (coalesce(lrelid, rrelid), coalesce(lattnum, rattnum), opno, eval_type)::@extschema@.qual AS qual, queryid FROM @extschema@.pg_qualstats() q JOIN pg_catalog.pg_database d ON q.dbid = d.oid LEFT JOIN pg_catalog.pg_operator op ON op.oid = q.opno LEFT JOIN pg_catalog.pg_amop amop ON amop.amopopr = op.oid LEFT JOIN pg_catalog.pg_am am ON am.oid = amop.amopmethod WHERE d.datname = current_database() AND eval_type = 'f' AND coalesce(lrelid, rrelid) != 0 AND amname IS NULL ) SELECT pg_catalog.json_build_object( 'qual', @extschema@.pg_qualstats_deparse_qual(qual), -- be careful to generate an empty array if no queryid availiable 'queryids', coalesce(pg_catalog.array_agg(DISTINCT queryid) FILTER (WHERE queryid IS NOT NULL), '{}') ) AS obj, array_agg(qualnodeid) AS qualnodeids FROM src GROUP BY qual LOOP v_unoptimised := array_append(v_unoptimised, rec.obj); v_processed := array_cat(v_processed, rec.qualnodeids); END LOOP; -- The index suggestion is done in multiple iteration, by scoring for each -- relation containing interesting quals a path of possibly AND-ed quals -- that contains other possibly AND-ed quals. Only the higher score path -- will be used to create an index, so we can then compute another set of -- paths ignoring the quals that are now optimized with an index. WHILE v_nb_processed > 0 LOOP v_nb_processed := 0; FOR rec IN -- first, find quals that seems worth to optimize along with the -- possible access methods, discarding any qualnode that are marked as -- already processed. Also apply access method restriction. WITH pgqs AS ( SELECT dbid, amname, qualid, qualnodeid, (coalesce(lrelid, rrelid), coalesce(lattnum, rattnum), opno, eval_type)::@extschema@.qual AS qual, queryid, round(avg(execution_count)) AS execution_count, sum(occurences) AS occurences, round(sum(nbfiltered)::numeric / sum(occurences)) AS avg_filter, CASE WHEN sum(execution_count) = 0 THEN 0 ELSE round(sum(nbfiltered::numeric) / sum(execution_count) * 100) END AS avg_selectivity FROM @extschema@.pg_qualstats() q JOIN pg_catalog.pg_database d ON q.dbid = d.oid JOIN pg_catalog.pg_operator op ON op.oid = q.opno JOIN pg_catalog.pg_amop amop ON amop.amopopr = op.oid JOIN pg_catalog.pg_am am ON am.oid = amop.amopmethod WHERE d.datname = current_database() AND eval_type = 'f' AND amname != ALL (forbidden_am) AND coalesce(lrelid, rrelid) != 0 AND qualnodeid != ALL(v_processed) GROUP BY dbid, amname, qualid, qualnodeid, lrelid, rrelid, lattnum, rattnum, opno, eval_type, queryid ), -- apply cardinality and selectivity restrictions filtered AS ( SELECT (qual).relid, amname, coalesce(qualid, qualnodeid) AS parent, count(*) AS weight, (array_agg(DISTINCT qualnodeid), array_agg(queryid) )::@extschema@.adv_quals AS quals FROM pgqs WHERE avg_filter >= min_filter AND avg_selectivity >= min_selectivity GROUP BY (qual).relid, amname, parent ), -- for each possibly AND-ed qual, build the list of included qualnodeid nodes AS ( SELECT p.relid, p.amname, p.parent, p.quals, c.quals AS children FROM filtered p LEFT JOIN filtered c ON (p.quals).qualnodeids @> (c.quals).qualnodeids AND p.amname = c.amname AND p.parent != c.parent AND (p.quals).qualnodeids != (c.quals).qualnodeids ), -- build the "paths", which is the list of AND-ed quals that entirely -- contains another possibly AND-ed quals, and give a score for each -- path. The scoring method used here is simply the number of -- columns in the quals. paths AS ( SELECT DISTINCT *, coalesce(pg_catalog.array_length((children).qualnodeids, 1), 0) AS weight FROM nodes UNION SELECT DISTINCT p.relid, p.amname, p.parent, p.quals, c.children, coalesce(pg_catalog.array_length((c.children).qualnodeids, 1), 0) AS weight FROM nodes p JOIN nodes c ON (p.children).qualnodeids @> (c.quals).qualnodeids AND (c.quals).qualnodeids IS NOT NULL AND (c.quals).qualnodeids != (p.quals).qualnodeids AND p.amname = c.amname ), -- compute the final paths. -- The scoring method used here is simply the sum of total -- number of columns in each possibly AND-ed quals, so that we can -- later chose to create indexes that optimize as many queries as -- possible with as few indexes as possible. -- We also compute here an access method weight, so that we can later -- choose a btree index rather than another access method if btree is -- available. computed AS ( SELECT relid, amname, parent, quals, array_agg(to_json(children) ORDER BY weight) FILTER (WHERE children IS NOT NULL) AS included, pg_catalog.array_length((quals).qualnodeids, 1) + sum(weight) AS path_weight, CASE amname WHEN 'btree' THEN 1 ELSE 2 END AS amweight FROM paths GROUP BY relid, amname, parent, quals ), -- compute a rank for each final paths, per relation. final AS ( SELECT relid, amname, parent, quals, included, path_weight, amweight, row_number() OVER ( PARTITION BY relid ORDER BY path_weight DESC, amweight) AS rownum FROM computed ) -- and finally choose the higher rank final path for each relation. SELECT relid, amname, parent, (quals).qualnodeids as quals, (quals).queryids as queryids, included, path_weight FROM final WHERE rownum = 1 LOOP v_nb_processed := v_nb_processed + 1; v_ddl := ''; v_quals_todo := '{}'; v_quals_done := '{}'; v_quals_col_done := '{}'; -- put columns from included quals, if any, first for order dependency DECLARE v_cur json; BEGIN IF rec.included IS NOT NULL THEN FOR v_cur IN SELECT v->'qualnodeids' FROM (SELECT * FROM unnest(rec.included)) AS r(v) ORDER BY pg_catalog.json_array_length(v->'qualnodeids') ASC LOOP -- Direct cast from json to bigint is only possible since pg10 FOR v_qualnodeid IN SELECT pg_catalog.json_array_elements(v_cur)::text::bigint LOOP v_quals_todo := v_quals_todo || v_qualnodeid; END LOOP; END LOOP; END IF; END; -- and append qual's own columns v_quals_todo := v_quals_todo || rec.quals; -- generate the index DDL FOREACH v_qualnodeid IN ARRAY v_quals_todo LOOP -- skip quals already present in the index CONTINUE WHEN v_quals_done @> ARRAY[v_qualnodeid]; -- skip other quals for the same column v_col := @extschema@.pg_qualstats_get_idx_col(v_qualnodeid, false); CONTINUE WHEN v_quals_col_done @> ARRAY[v_col]; -- mark this qual as present in a generated index so it's ignore at -- next round of best quals to optimize v_processed := pg_catalog.array_append(v_processed, v_qualnodeid); -- mark this qual and col as present in this index v_quals_done := v_quals_done || v_qualnodeid; v_quals_col_done := v_quals_col_done || v_col; -- if underlying table has been dropped, stop here CONTINUE WHEN coalesce(v_col, '') = ''; -- append the column to the index IF v_ddl != '' THEN v_ddl := v_ddl || ', '; END IF; v_ddl := v_ddl || @extschema@.pg_qualstats_get_idx_col(v_qualnodeid, true); END LOOP; -- if underlying table has been dropped, skip this (broken) index CONTINUE WHEN coalesce(v_ddl, '') = ''; -- generate the full CREATE INDEX ddl v_ddl = pg_catalog.format('CREATE INDEX ON %s USING %I (%s)', @extschema@.pg_qualstats_get_qualnode_rel(v_qualnodeid), rec.amname, v_ddl); -- get the underlyings queryid(s) DECLARE v_queryid text; v_cur json; BEGIN v_queryids = rec.queryids; IF rec.included IS NOT NULL THEN FOREACH v_cur IN ARRAY rec.included LOOP -- Direct cast from json to bigint is only possible since pg10 FOR v_queryid IN SELECT pg_catalog.json_array_elements(v_cur->'queryids')::text LOOP CONTINUE WHEN v_queryid = 'null'; v_queryids := v_queryids || v_queryid::text::bigint; END LOOP; END LOOP; END IF; END; -- remove any duplicates SELECT pg_catalog.array_agg(DISTINCT v) INTO v_queryids FROM (SELECT unnest(v_queryids)) s(v); -- sanitize the queryids IF v_queryids IS NULL OR v_queryids = '{null}' THEN v_queryids = '{}'; END IF; -- and finally append the index to the list of generated indexes v_indexes := pg_catalog.array_append(v_indexes, pg_catalog.json_build_object( 'ddl', v_ddl, 'queryids', v_queryids ) ); END LOOP; END LOOP; RETURN pg_catalog.json_build_object( 'indexes', v_indexes, 'unoptimised', v_unoptimised); END; $_$ LANGUAGE plpgsql; /* end of pg_qualstats_index_advisor */ pg_qualstats-2.1.1/pg_qualstats--2.1.0.sql000066400000000000000000000641721467511452200202040ustar00rootroot00000000000000/*""" .. function:: pg_qualstats_reset() Resets statistics gathered by pg_qualstats. */ CREATE FUNCTION @extschema@.pg_qualstats_reset() RETURNS void AS 'MODULE_PATHNAME' LANGUAGE C; /*""" .. function pg_qualstats_example_query(bigint) Returns an example for a normalized query, given its queryid */ CREATE FUNCTION @extschema@.pg_qualstats_example_query(bigint) RETURNS text AS 'MODULE_PATHNAME' LANGUAGE C; /*""" .. function pg_qualstats_example_queries() Returns all the example queries with their associated queryid */ CREATE FUNCTION @extschema@.pg_qualstats_example_queries(OUT queryid bigint, OUT query text) RETURNS SETOF record AS 'MODULE_PATHNAME' LANGUAGE C; /*""" .. function:: pg_qualstats() Returns: A SETOF record containing the data gathered by pg_qualstats Attributes: userid (oid): the user who executed the query dbid (oid): the database on which the query was executed lrelid (oid): oid of the relation on the left hand side lattnum (attnum): attribute number of the column on the left hand side opno (oid): oid of the operator used in the expression rrelid (oid): oid of the relation on the right hand side rattnum (attnum): attribute number of the column on the right hand side qualid(bigint): hash of the parent ``AND`` expression, if any. This is useful for identifying predicates which are used together. uniquequalid(bigint): hash of the parent ``AND`` expression, if any, including the constant values. qualnodeid(bigint): the predicate hash. uniquequalnodeid(bigint): the predicate hash. Everything (down to constants) is used to compute this hash occurences (bigint): the number of times this predicate has been seen execution_count (bigint): the total number of execution of this predicate. nbfiltered (bigint): the number of lines filtered by this predicate min_err_estimate_ratio(double precision): the minimum selectivity estimation error ratio for this predicate max_err_estimate_ratio(double precision): the maximum selectivity estimation error ratio for this predicate mean_err_estimate_ratio(double precision): the mean selectivity estimation error ratio for this predicate stddev_err_estimate_ratio(double precision): the standard deviation for selectivity estimation error ratio for this predicate min_err_estimate_num(bigint): the minimum number of line for selectivity estimation error for this predicate max_err_estimate_num(bigint): the maximum number of line for selectivity estimation error for this predicate mean_err_estimate_num(double precision): the mean number of line for selectivity estimation error for this predicate stddev_err_estimate_num(double precision): the standard deviation for number of line for selectivity estimation error for this predicate constant_position (int): the position of the constant in the original query, as filled by the lexer. queryid (bigint): the queryid identifying this query, as generated by pg_stat_statements constvalue (varchar): a string representation of the right-hand side constant, if any, truncated to 80 bytes. eval_type (char): the evaluation type. Possible values are ``f`` for execution as a filter (ie, after a Scan) or ``i`` if it was evaluated as an index predicate. If the qual is evaluated as an index predicate, then the nbfiltered value will most likely be 0, except if there was any rechecked conditions. Example: .. code-block:: sql powa=# select * from powa_statements where queryid != 2; powa=# select * from pg_qualstats(); -[ RECORD 1 ]-----+----------- userid | 10 dbid | 32799 lrelid | 189341 lattnum | 2 opno | 417 rrelid | rattnum | qualid | uniquequalid | qualnodeid | 1391544855 uniquequalnodeid | 551979005 occurences | 1 execution_count | 31 nbfiltered | 0 min_err_estimate_ratio | 32.741935483871 max_err_estimate_ratio | 32.741935483871 mean_err_estimate_ratio | 32.741935483871 stddev_err_estimate_ratio | 0 min_err_estimate_num | 984 max_err_estimate_num | 984 mean_err_estimate_num | 984 stddev_err_estimate_num | 0 constant_position | 47 queryid | -6668685762776610659 constvalue | 2::integer eval_type | f */ CREATE FUNCTION @extschema@.pg_qualstats( OUT userid oid, OUT dbid oid, OUT lrelid oid, OUT lattnum smallint, OUT opno oid, OUT rrelid oid, OUT rattnum smallint, OUT qualid bigint, OUT uniquequalid bigint, OUT qualnodeid bigint, OUT uniquequalnodeid bigint, OUT occurences bigint, OUT execution_count bigint, OUT nbfiltered bigint, OUT min_err_estimate_ratio double precision, OUT max_err_estimate_ratio double precision, OUT mean_err_estimate_ratio double precision, OUT stddev_err_estimate_ratio double precision, OUT min_err_estimate_num bigint, OUT max_err_estimate_num bigint, OUT mean_err_estimate_num double precision, OUT stddev_err_estimate_num double precision, OUT constant_position int, OUT queryid bigint, OUT constvalue varchar, OUT eval_type "char" ) RETURNS SETOF record AS 'MODULE_PATHNAME', 'pg_qualstats_2_0' LANGUAGE C STRICT VOLATILE; /*""" .. function:: pg_qualstats_names() This function is the same as pg_qualstats, but with additional columns corresponding to the resolved names, if ``pg_qualstats.resolve_oids`` is set to ``true``. Returns: The same set of columns than :func:`pg_qualstats()`, plus the following ones: rolname (text): the name of the role executing the query. Corresponds to userid. dbname (text): the name of the database on which the query was executed. Corresponds to dbid. lrelname (text): the name of the relation on the left-hand side of the qual. Corresponds to lrelid. lattname (text): the name of the attribute (column) on the left-hand side of the qual. Corresponds to rrelid. opname (text): the name of the operator. Corresponds to opno. */ CREATE FUNCTION @extschema@.pg_qualstats_names( OUT userid oid, OUT dbid oid, OUT lrelid oid, OUT lattnum smallint, OUT opno oid, OUT rrelid oid, OUT rattnum smallint, OUT qualid bigint, OUT uniquequalid bigint, OUT qualnodeid bigint, OUT uniquequalnodeid bigint, OUT occurences bigint, OUT execution_count bigint, OUT nbfiltered bigint, OUT min_err_estimate_ratio double precision, OUT max_err_estimate_ratio double precision, OUT mean_err_estimate_ratio double precision, OUT stddev_err_estimate_ratio double precision, OUT min_err_estimate_num bigint, OUT max_err_estimate_num bigint, OUT mean_err_estimate_num double precision, OUT stddev_err_estimate_num double precision, OUT constant_position int, OUT queryid bigint, OUT constvalue varchar, OUT eval_type "char", OUT rolname text, OUT dbname text, OUT lrelname text, OUT lattname text, OUT opname text, OUT rrelname text, OUT rattname text ) RETURNS SETOF record AS 'MODULE_PATHNAME', 'pg_qualstats_names_2_0' LANGUAGE C STRICT VOLATILE; -- Register a view on the function for ease of use. /*""" .. view:: pg_qualstats This view is just a simple wrapper on the :func:`pg_qualstats()` function, filtering on the current database for convenience. */ CREATE VIEW @extschema@.pg_qualstats AS SELECT qs.* FROM @extschema@.pg_qualstats() qs INNER JOIN pg_database on qs.dbid = pg_database.oid WHERE pg_database.datname = current_database(); GRANT SELECT ON @extschema@.pg_qualstats TO PUBLIC; -- Don't want this to be available to non-superusers. REVOKE ALL ON FUNCTION @extschema@.pg_qualstats_reset() FROM PUBLIC; /*""" .. view:: pg_qualstats_pretty This view resolves oid "on the fly", for the current database. Returns: left_schema (name): the name of the left-hand side relation's schema. left_table (name): the name of the left-hand side relation. left_column (name): the name of the left-hand side attribute. operator (name): the name of the operator. right_schema (name): the name of the right-hand side relation's schema. right_table (name): the name of the right-hand side relation. right_column (name): the name of the operator. execution_count (bigint): the total number of time this qual was executed. nbfiltered (bigint): the total number of tuples filtered by this qual. */ CREATE VIEW @extschema@.pg_qualstats_pretty AS select nl.nspname as left_schema, al.attrelid::regclass as left_table, al.attname as left_column, opno::regoper::text as operator, nr.nspname as right_schema, ar.attrelid::regclass as right_table, ar.attname as right_column, sum(occurences) as occurences, sum(execution_count) as execution_count, sum(nbfiltered) as nbfiltered from @extschema@.pg_qualstats qs left join (pg_class cl inner join pg_namespace nl on nl.oid = cl.relnamespace) on cl.oid = qs.lrelid left join (pg_class cr inner join pg_namespace nr on nr.oid = cr.relnamespace) on cr.oid = qs.rrelid left join pg_attribute al on al.attrelid = qs.lrelid and al.attnum = qs.lattnum left join pg_attribute ar on ar.attrelid = qs.rrelid and ar.attnum = qs.rattnum group by al.attrelid, al.attname, ar.attrelid, ar.attname, opno, nl.nspname, nr.nspname ; CREATE OR REPLACE VIEW @extschema@.pg_qualstats_all AS SELECT dbid, relid, userid, queryid, array_agg(distinct attnum) as attnums, opno, max(qualid) as qualid, sum(occurences) as occurences, sum(execution_count) as execution_count, sum(nbfiltered) as nbfiltered, coalesce(qualid, qualnodeid) as qualnodeid FROM ( SELECT qs.dbid, CASE WHEN lrelid IS NOT NULL THEN lrelid WHEN rrelid IS NOT NULL THEN rrelid END as relid, qs.userid as userid, CASE WHEN lrelid IS NOT NULL THEN lattnum WHEN rrelid IS NOT NULL THEN rattnum END as attnum, qs.opno as opno, qs.qualid as qualid, qs.qualnodeid as qualnodeid, qs.occurences as occurences, qs.execution_count as execution_count, qs.nbfiltered as nbfiltered, qs.queryid FROM @extschema@.pg_qualstats() qs WHERE lrelid IS NOT NULL or rrelid IS NOT NULL ) t GROUP BY dbid, relid, userid, queryid, opno, coalesce(qualid, qualnodeid) ; /*""" .. type:: qual Attributes: relid (oid): the relation oid attnum (integer): the attribute number opno (oid): the operator oid eval_type (char): the evaluation type. See :func:`pg_qualstats()` for an explanation of the eval_type. */ CREATE TYPE @extschema@.qual AS ( relid oid, attnum integer, opno oid, eval_type "char" ); /*""" .. type:: qualname Pendant of :type:`qual`, but with names instead of oids Attributes: relname (text): the relation oid attname (text): the attribute number opname (text): the operator name eval_type (char): the evaluation type. See :func:`pg_qualstats()` for an explanation of the eval_type. */ CREATE TYPE @extschema@.qualname AS ( relname text, attnname text, opname text, eval_type "char" ); CREATE TYPE @extschema@.adv_quals AS ( qualnodeids bigint[], queryids bigint[] ); CREATE OR REPLACE VIEW @extschema@.pg_qualstats_by_query AS SELECT coalesce(uniquequalid, uniquequalnodeid) as uniquequalnodeid, dbid, userid, coalesce(qualid, qualnodeid) as qualnodeid, occurences, execution_count, nbfiltered, queryid, array_agg(constvalue order by constant_position) as constvalues, array_agg(ROW(relid, attnum, opno, eval_type)::@extschema@.qual) as quals FROM ( SELECT qs.dbid, CASE WHEN lrelid IS NOT NULL THEN lrelid WHEN rrelid IS NOT NULL THEN rrelid END as relid, qs.userid as userid, CASE WHEN lrelid IS NOT NULL THEN lattnum WHEN rrelid IS NOT NULL THEN rattnum END as attnum, qs.opno as opno, qs.qualid as qualid, qs.uniquequalid as uniquequalid, qs.qualnodeid as qualnodeid, qs.uniquequalnodeid as uniquequalnodeid, qs.occurences as occurences, qs.execution_count as execution_count, qs.queryid as queryid, qs.constvalue as constvalue, qs.nbfiltered as nbfiltered, qs.eval_type, qs.constant_position FROM @extschema@.pg_qualstats() qs WHERE (qs.lrelid IS NULL) != (qs.rrelid IS NULL) ) i GROUP BY coalesce(uniquequalid, uniquequalnodeid), coalesce(qualid, qualnodeid), dbid, userid, occurences, execution_count, nbfiltered, queryid ; CREATE OR REPLACE FUNCTION @extschema@.pg_qualstats_deparse_qual(qual qual) RETURNS TEXT AS $_$ SELECT pg_catalog.format('%I.%I %s ?', c.oid::regclass, a.attname, o.oprname) FROM pg_catalog.pg_class c JOIN pg_catalog.pg_attribute a ON a.attrelid = c.oid JOIN pg_catalog.pg_operator o ON o.oid = qual.opno WHERE c.oid = qual.relid AND a.attnum = qual.attnum $_$ LANGUAGE sql; CREATE OR REPLACE FUNCTION @extschema@.pg_qualstats_get_qualnode_rel(bigint) RETURNS TEXT AS $_$ SELECT pg_catalog.quote_ident(n.nspname) || '.' || pg_catalog.quote_ident(c.relname) FROM @extschema@.pg_qualstats() q JOIN pg_catalog.pg_class c ON coalesce(q.lrelid, q.rrelid) = c.oid JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace WHERE q.qualnodeid = $1 $_$ LANGUAGE sql; CREATE OR REPLACE FUNCTION @extschema@.pg_qualstats_get_idx_col(bigint, include_nondefault_opclass boolean = true) RETURNS TEXT AS $_$ SELECT pg_catalog.quote_ident(a.attname) || CASE WHEN include_nondefault_opclass THEN CASE WHEN opc.opcdefault THEN '' ELSE ' ' || pg_catalog.quote_ident(opcname) END ELSE '' END FROM @extschema@.pg_qualstats() q JOIN pg_catalog.pg_class c ON coalesce(q.lrelid, q.rrelid) = c.oid JOIN pg_catalog.pg_attribute a ON a.attrelid = c.oid AND a.attnum = coalesce(q.lattnum, q.rattnum) JOIN pg_catalog.pg_operator op ON op.oid = q.opno JOIN pg_catalog.pg_amop amop ON amop.amopopr = op.oid JOIN pg_catalog.pg_am am ON am.oid = amop.amopmethod JOIN pg_catalog.pg_opfamily f ON f.opfmethod = am.oid AND amop.amopfamily = f.oid JOIN pg_catalog.pg_opclass opc ON opc.opcfamily = f.oid WHERE q.qualnodeid = $1 ORDER BY CASE opcdefault WHEN true THEN 0 ELSE 1 END; $_$ LANGUAGE sql; CREATE OR REPLACE FUNCTION @extschema@.pg_qualstats_index_advisor ( min_filter integer DEFAULT 1000, min_selectivity integer DEFAULT 30, forbidden_am text[] DEFAULT '{}') RETURNS json AS $_$ DECLARE v_processed bigint[] = '{}'; v_indexes json[] = '{}'; v_unoptimised json[] = '{}'; rec record; v_nb_processed integer = 1; v_ddl text; v_col text; v_qualnodeid bigint; v_quals_todo bigint[]; v_quals_done bigint[]; v_quals_col_done text[]; v_queryids bigint[] = '{}'; BEGIN -- sanity checks and default values SELECT coalesce(min_filter, 1000), coalesce(min_selectivity, 30), coalesce(forbidden_am, '{}') INTO min_filter, min_selectivity, forbidden_am; -- don't try to generate hash indexes Before pg 10, as those are only WAL -- logged since pg 11. IF pg_catalog.current_setting('server_version_num')::bigint < 100000 THEN forbidden_am := array_append(forbidden_am, 'hash'); END IF; -- first find out unoptimizable quals. -- We need an array of json containing the per-qual info, and a single -- array containing all the underlying qualnodeids, so we need to create -- the wanted final object manually as we can't have two different grouping -- approach. FOR rec IN WITH src AS (SELECT DISTINCT qualnodeid, (coalesce(lrelid, rrelid), coalesce(lattnum, rattnum), opno, eval_type)::@extschema@.qual AS qual, queryid FROM @extschema@.pg_qualstats() q JOIN pg_catalog.pg_database d ON q.dbid = d.oid LEFT JOIN pg_catalog.pg_operator op ON op.oid = q.opno LEFT JOIN pg_catalog.pg_amop amop ON amop.amopopr = op.oid LEFT JOIN pg_catalog.pg_am am ON am.oid = amop.amopmethod WHERE d.datname = current_database() AND eval_type = 'f' AND coalesce(lrelid, rrelid) != 0 AND amname IS NULL ) SELECT pg_catalog.json_build_object( 'qual', @extschema@.pg_qualstats_deparse_qual(qual), -- be careful to generate an empty array if no queryid availiable 'queryids', coalesce(pg_catalog.array_agg(DISTINCT queryid) FILTER (WHERE queryid IS NOT NULL), '{}') ) AS obj, array_agg(qualnodeid) AS qualnodeids FROM src GROUP BY qual LOOP v_unoptimised := array_append(v_unoptimised, rec.obj); v_processed := array_cat(v_processed, rec.qualnodeids); END LOOP; -- The index suggestion is done in multiple iteration, by scoring for each -- relation containing interesting quals a path of possibly AND-ed quals -- that contains other possibly AND-ed quals. Only the higher score path -- will be used to create an index, so we can then compute another set of -- paths ignoring the quals that are now optimized with an index. WHILE v_nb_processed > 0 LOOP v_nb_processed := 0; FOR rec IN -- first, find quals that seems worth to optimize along with the -- possible access methods, discarding any qualnode that are marked as -- already processed. Also apply access method restriction. WITH pgqs AS ( SELECT dbid, amname, qualid, qualnodeid, (coalesce(lrelid, rrelid), coalesce(lattnum, rattnum), opno, eval_type)::@extschema@.qual AS qual, queryid, round(avg(execution_count)) AS execution_count, sum(occurences) AS occurences, round(sum(nbfiltered)::numeric / sum(occurences)) AS avg_filter, CASE WHEN sum(execution_count) = 0 THEN 0 ELSE round(sum(nbfiltered::numeric) / sum(execution_count) * 100) END AS avg_selectivity FROM @extschema@.pg_qualstats() q JOIN pg_catalog.pg_database d ON q.dbid = d.oid JOIN pg_catalog.pg_operator op ON op.oid = q.opno JOIN pg_catalog.pg_amop amop ON amop.amopopr = op.oid JOIN pg_catalog.pg_am am ON am.oid = amop.amopmethod WHERE d.datname = current_database() AND eval_type = 'f' AND amname != ALL (forbidden_am) AND coalesce(lrelid, rrelid) != 0 AND qualnodeid != ALL(v_processed) GROUP BY dbid, amname, qualid, qualnodeid, lrelid, rrelid, lattnum, rattnum, opno, eval_type, queryid ), -- apply cardinality and selectivity restrictions filtered AS ( SELECT (qual).relid, amname, coalesce(qualid, qualnodeid) AS parent, count(*) AS weight, (array_agg(qualnodeid), array_agg(queryid) )::@extschema@.adv_quals AS quals FROM pgqs WHERE avg_filter >= min_filter AND avg_selectivity >= min_selectivity GROUP BY (qual).relid, amname, parent ), -- for each possibly AND-ed qual, build the list of included qualnodeid nodes AS ( SELECT p.relid, p.amname, p.parent, p.quals, c.quals AS children FROM filtered p LEFT JOIN filtered c ON (p.quals).qualnodeids @> (c.quals).qualnodeids AND p.amname = c.amname AND p.parent != c.parent AND (p.quals).qualnodeids != (c.quals).qualnodeids ), -- build the "paths", which is the list of AND-ed quals that entirely -- contains another possibly AND-ed quals, and give a score for each -- path. The scoring method used here is simply the number of -- columns in the quals. paths AS ( SELECT DISTINCT *, coalesce(pg_catalog.array_length((children).qualnodeids, 1), 0) AS weight FROM nodes UNION SELECT DISTINCT p.relid, p.amname, p.parent, p.quals, c.children, coalesce(pg_catalog.array_length((c.children).qualnodeids, 1), 0) AS weight FROM nodes p JOIN nodes c ON (p.children).qualnodeids @> (c.quals).qualnodeids AND (c.quals).qualnodeids IS NOT NULL AND (c.quals).qualnodeids != (p.quals).qualnodeids AND p.amname = c.amname ), -- compute the final paths. -- The scoring method used here is simply the sum of total -- number of columns in each possibly AND-ed quals, so that we can -- later chose to create indexes that optimize as many queries as -- possible with as few indexes as possible. -- We also compute here an access method weight, so that we can later -- choose a btree index rather than another access method if btree is -- available. computed AS ( SELECT relid, amname, parent, quals, array_agg(to_json(children) ORDER BY weight) FILTER (WHERE children IS NOT NULL) AS included, pg_catalog.array_length((quals).qualnodeids, 1) + sum(weight) AS path_weight, CASE amname WHEN 'btree' THEN 1 ELSE 2 END AS amweight FROM paths GROUP BY relid, amname, parent, quals ), -- compute a rank for each final paths, per relation. final AS ( SELECT relid, amname, parent, quals, included, path_weight, amweight, row_number() OVER ( PARTITION BY relid ORDER BY path_weight DESC, amweight) AS rownum FROM computed ) -- and finally choose the higher rank final path for each relation. SELECT relid, amname, parent, (quals).qualnodeids as quals, (quals).queryids as queryids, included, path_weight FROM final WHERE rownum = 1 LOOP v_nb_processed := v_nb_processed + 1; v_ddl := ''; v_quals_todo := '{}'; v_quals_done := '{}'; v_quals_col_done := '{}'; -- put columns from included quals, if any, first for order dependency DECLARE v_cur json; BEGIN IF rec.included IS NOT NULL THEN FOREACH v_cur IN ARRAY rec.included LOOP -- Direct cast from json to bigint is only possible since pg10 FOR v_qualnodeid IN SELECT pg_catalog.json_array_elements(v_cur->'qualnodeids')::text::bigint LOOP v_quals_todo := v_quals_todo || v_qualnodeid; END LOOP; END LOOP; END IF; END; -- and append qual's own columns v_quals_todo := v_quals_todo || rec.quals; -- generate the index DDL FOREACH v_qualnodeid IN ARRAY v_quals_todo LOOP -- skip quals already present in the index CONTINUE WHEN v_quals_done @> ARRAY[v_qualnodeid]; -- skip other quals for the same column v_col := @extschema@.pg_qualstats_get_idx_col(v_qualnodeid, false); CONTINUE WHEN v_quals_col_done @> ARRAY[v_col]; -- mark this qual as present in a generated index so it's ignore at -- next round of best quals to optimize v_processed := pg_catalog.array_append(v_processed, v_qualnodeid); -- mark this qual and col as present in this index v_quals_done := v_quals_done || v_qualnodeid; v_quals_col_done := v_quals_col_done || v_col; -- if underlying table has been dropped, stop here CONTINUE WHEN coalesce(v_col, '') = ''; -- append the column to the index IF v_ddl != '' THEN v_ddl := v_ddl || ', '; END IF; v_ddl := v_ddl || @extschema@.pg_qualstats_get_idx_col(v_qualnodeid, true); END LOOP; -- if underlying table has been dropped, skip this (broken) index CONTINUE WHEN coalesce(v_ddl, '') = ''; -- generate the full CREATE INDEX ddl v_ddl = pg_catalog.format('CREATE INDEX ON %s USING %I (%s)', @extschema@.pg_qualstats_get_qualnode_rel(v_qualnodeid), rec.amname, v_ddl); -- get the underlyings queryid(s) DECLARE v_queryid text; v_cur json; BEGIN v_queryids = rec.queryids; IF rec.included IS NOT NULL THEN FOREACH v_cur IN ARRAY rec.included LOOP -- Direct cast from json to bigint is only possible since pg10 FOR v_queryid IN SELECT pg_catalog.json_array_elements(v_cur->'queryids')::text LOOP CONTINUE WHEN v_queryid = 'null'; v_queryids := v_queryids || v_queryid::text::bigint; END LOOP; END LOOP; END IF; END; -- remove any duplicates SELECT pg_catalog.array_agg(DISTINCT v) INTO v_queryids FROM (SELECT unnest(v_queryids)) s(v); -- sanitize the queryids IF v_queryids IS NULL OR v_queryids = '{null}' THEN v_queryids = '{}'; END IF; -- and finally append the index to the list of generated indexes v_indexes := pg_catalog.array_append(v_indexes, pg_catalog.json_build_object( 'ddl', v_ddl, 'queryids', v_queryids ) ); END LOOP; END LOOP; RETURN pg_catalog.json_build_object( 'indexes', v_indexes, 'unoptimised', v_unoptimised); END; $_$ LANGUAGE plpgsql; /* end of pg_qualstats_index_advisor */ pg_qualstats-2.1.1/pg_qualstats--2.1.1.sql000066400000000000000000000644311467511452200202030ustar00rootroot00000000000000/*""" .. function:: pg_qualstats_reset() Resets statistics gathered by pg_qualstats. */ CREATE FUNCTION @extschema@.pg_qualstats_reset() RETURNS void AS 'MODULE_PATHNAME' LANGUAGE C; /*""" .. function pg_qualstats_example_query(bigint) Returns an example for a normalized query, given its queryid */ CREATE FUNCTION @extschema@.pg_qualstats_example_query(bigint) RETURNS text AS 'MODULE_PATHNAME' LANGUAGE C; /*""" .. function pg_qualstats_example_queries() Returns all the example queries with their associated queryid */ CREATE FUNCTION @extschema@.pg_qualstats_example_queries(OUT queryid bigint, OUT query text) RETURNS SETOF record AS 'MODULE_PATHNAME' LANGUAGE C; /*""" .. function:: pg_qualstats() Returns: A SETOF record containing the data gathered by pg_qualstats Attributes: userid (oid): the user who executed the query dbid (oid): the database on which the query was executed lrelid (oid): oid of the relation on the left hand side lattnum (attnum): attribute number of the column on the left hand side opno (oid): oid of the operator used in the expression rrelid (oid): oid of the relation on the right hand side rattnum (attnum): attribute number of the column on the right hand side qualid(bigint): hash of the parent ``AND`` expression, if any. This is useful for identifying predicates which are used together. uniquequalid(bigint): hash of the parent ``AND`` expression, if any, including the constant values. qualnodeid(bigint): the predicate hash. uniquequalnodeid(bigint): the predicate hash. Everything (down to constants) is used to compute this hash occurences (bigint): the number of times this predicate has been seen execution_count (bigint): the total number of execution of this predicate. nbfiltered (bigint): the number of lines filtered by this predicate min_err_estimate_ratio(double precision): the minimum selectivity estimation error ratio for this predicate max_err_estimate_ratio(double precision): the maximum selectivity estimation error ratio for this predicate mean_err_estimate_ratio(double precision): the mean selectivity estimation error ratio for this predicate stddev_err_estimate_ratio(double precision): the standard deviation for selectivity estimation error ratio for this predicate min_err_estimate_num(bigint): the minimum number of line for selectivity estimation error for this predicate max_err_estimate_num(bigint): the maximum number of line for selectivity estimation error for this predicate mean_err_estimate_num(double precision): the mean number of line for selectivity estimation error for this predicate stddev_err_estimate_num(double precision): the standard deviation for number of line for selectivity estimation error for this predicate constant_position (int): the position of the constant in the original query, as filled by the lexer. queryid (bigint): the queryid identifying this query, as generated by pg_stat_statements constvalue (varchar): a string representation of the right-hand side constant, if any, truncated to 80 bytes. eval_type (char): the evaluation type. Possible values are ``f`` for execution as a filter (ie, after a Scan) or ``i`` if it was evaluated as an index predicate. If the qual is evaluated as an index predicate, then the nbfiltered value will most likely be 0, except if there was any rechecked conditions. Example: .. code-block:: sql powa=# select * from powa_statements where queryid != 2; powa=# select * from pg_qualstats(); -[ RECORD 1 ]-----+----------- userid | 10 dbid | 32799 lrelid | 189341 lattnum | 2 opno | 417 rrelid | rattnum | qualid | uniquequalid | qualnodeid | 1391544855 uniquequalnodeid | 551979005 occurences | 1 execution_count | 31 nbfiltered | 0 min_err_estimate_ratio | 32.741935483871 max_err_estimate_ratio | 32.741935483871 mean_err_estimate_ratio | 32.741935483871 stddev_err_estimate_ratio | 0 min_err_estimate_num | 984 max_err_estimate_num | 984 mean_err_estimate_num | 984 stddev_err_estimate_num | 0 constant_position | 47 queryid | -6668685762776610659 constvalue | 2::integer eval_type | f */ CREATE FUNCTION @extschema@.pg_qualstats( OUT userid oid, OUT dbid oid, OUT lrelid oid, OUT lattnum smallint, OUT opno oid, OUT rrelid oid, OUT rattnum smallint, OUT qualid bigint, OUT uniquequalid bigint, OUT qualnodeid bigint, OUT uniquequalnodeid bigint, OUT occurences bigint, OUT execution_count bigint, OUT nbfiltered bigint, OUT min_err_estimate_ratio double precision, OUT max_err_estimate_ratio double precision, OUT mean_err_estimate_ratio double precision, OUT stddev_err_estimate_ratio double precision, OUT min_err_estimate_num bigint, OUT max_err_estimate_num bigint, OUT mean_err_estimate_num double precision, OUT stddev_err_estimate_num double precision, OUT constant_position int, OUT queryid bigint, OUT constvalue varchar, OUT eval_type "char" ) RETURNS SETOF record AS 'MODULE_PATHNAME', 'pg_qualstats_2_0' LANGUAGE C STRICT VOLATILE; /*""" .. function:: pg_qualstats_names() This function is the same as pg_qualstats, but with additional columns corresponding to the resolved names, if ``pg_qualstats.resolve_oids`` is set to ``true``. Returns: The same set of columns than :func:`pg_qualstats()`, plus the following ones: rolname (text): the name of the role executing the query. Corresponds to userid. dbname (text): the name of the database on which the query was executed. Corresponds to dbid. lrelname (text): the name of the relation on the left-hand side of the qual. Corresponds to lrelid. lattname (text): the name of the attribute (column) on the left-hand side of the qual. Corresponds to rrelid. opname (text): the name of the operator. Corresponds to opno. */ CREATE FUNCTION @extschema@.pg_qualstats_names( OUT userid oid, OUT dbid oid, OUT lrelid oid, OUT lattnum smallint, OUT opno oid, OUT rrelid oid, OUT rattnum smallint, OUT qualid bigint, OUT uniquequalid bigint, OUT qualnodeid bigint, OUT uniquequalnodeid bigint, OUT occurences bigint, OUT execution_count bigint, OUT nbfiltered bigint, OUT min_err_estimate_ratio double precision, OUT max_err_estimate_ratio double precision, OUT mean_err_estimate_ratio double precision, OUT stddev_err_estimate_ratio double precision, OUT min_err_estimate_num bigint, OUT max_err_estimate_num bigint, OUT mean_err_estimate_num double precision, OUT stddev_err_estimate_num double precision, OUT constant_position int, OUT queryid bigint, OUT constvalue varchar, OUT eval_type "char", OUT rolname text, OUT dbname text, OUT lrelname text, OUT lattname text, OUT opname text, OUT rrelname text, OUT rattname text ) RETURNS SETOF record AS 'MODULE_PATHNAME', 'pg_qualstats_names_2_0' LANGUAGE C STRICT VOLATILE; -- Register a view on the function for ease of use. /*""" .. view:: pg_qualstats This view is just a simple wrapper on the :func:`pg_qualstats()` function, filtering on the current database for convenience. */ CREATE VIEW @extschema@.pg_qualstats AS SELECT qs.* FROM @extschema@.pg_qualstats() qs INNER JOIN pg_database on qs.dbid = pg_database.oid WHERE pg_database.datname = current_database(); GRANT SELECT ON @extschema@.pg_qualstats TO PUBLIC; -- Don't want this to be available to non-superusers. REVOKE ALL ON FUNCTION @extschema@.pg_qualstats_reset() FROM PUBLIC; /*""" .. view:: pg_qualstats_pretty This view resolves oid "on the fly", for the current database. Returns: left_schema (name): the name of the left-hand side relation's schema. left_table (name): the name of the left-hand side relation. left_column (name): the name of the left-hand side attribute. operator (name): the name of the operator. right_schema (name): the name of the right-hand side relation's schema. right_table (name): the name of the right-hand side relation. right_column (name): the name of the operator. execution_count (bigint): the total number of time this qual was executed. nbfiltered (bigint): the total number of tuples filtered by this qual. */ CREATE VIEW @extschema@.pg_qualstats_pretty AS select nl.nspname as left_schema, al.attrelid::regclass as left_table, al.attname as left_column, opno::regoper::text as operator, nr.nspname as right_schema, ar.attrelid::regclass as right_table, ar.attname as right_column, sum(occurences) as occurences, sum(execution_count) as execution_count, sum(nbfiltered) as nbfiltered from @extschema@.pg_qualstats qs left join (pg_class cl inner join pg_namespace nl on nl.oid = cl.relnamespace) on cl.oid = qs.lrelid left join (pg_class cr inner join pg_namespace nr on nr.oid = cr.relnamespace) on cr.oid = qs.rrelid left join pg_attribute al on al.attrelid = qs.lrelid and al.attnum = qs.lattnum left join pg_attribute ar on ar.attrelid = qs.rrelid and ar.attnum = qs.rattnum group by al.attrelid, al.attname, ar.attrelid, ar.attname, opno, nl.nspname, nr.nspname ; CREATE OR REPLACE VIEW @extschema@.pg_qualstats_all AS SELECT dbid, relid, userid, queryid, array_agg(distinct attnum) as attnums, opno, max(qualid) as qualid, sum(occurences) as occurences, sum(execution_count) as execution_count, sum(nbfiltered) as nbfiltered, coalesce(qualid, qualnodeid) as qualnodeid FROM ( SELECT qs.dbid, CASE WHEN lrelid IS NOT NULL THEN lrelid WHEN rrelid IS NOT NULL THEN rrelid END as relid, qs.userid as userid, CASE WHEN lrelid IS NOT NULL THEN lattnum WHEN rrelid IS NOT NULL THEN rattnum END as attnum, qs.opno as opno, qs.qualid as qualid, qs.qualnodeid as qualnodeid, qs.occurences as occurences, qs.execution_count as execution_count, qs.nbfiltered as nbfiltered, qs.queryid FROM @extschema@.pg_qualstats() qs WHERE lrelid IS NOT NULL or rrelid IS NOT NULL ) t GROUP BY dbid, relid, userid, queryid, opno, coalesce(qualid, qualnodeid) ; /*""" .. type:: qual Attributes: relid (oid): the relation oid attnum (integer): the attribute number opno (oid): the operator oid eval_type (char): the evaluation type. See :func:`pg_qualstats()` for an explanation of the eval_type. */ CREATE TYPE @extschema@.qual AS ( relid oid, attnum integer, opno oid, eval_type "char" ); /*""" .. type:: qualname Pendant of :type:`qual`, but with names instead of oids Attributes: relname (text): the relation oid attname (text): the attribute number opname (text): the operator name eval_type (char): the evaluation type. See :func:`pg_qualstats()` for an explanation of the eval_type. */ CREATE TYPE @extschema@.qualname AS ( relname text, attnname text, opname text, eval_type "char" ); CREATE TYPE @extschema@.adv_quals AS ( qualnodeids bigint[], queryids bigint[] ); CREATE OR REPLACE VIEW @extschema@.pg_qualstats_by_query AS SELECT coalesce(uniquequalid, uniquequalnodeid) as uniquequalnodeid, dbid, userid, coalesce(qualid, qualnodeid) as qualnodeid, occurences, execution_count, nbfiltered, queryid, array_agg(constvalue order by constant_position) as constvalues, array_agg(ROW(relid, attnum, opno, eval_type)::@extschema@.qual) as quals FROM ( SELECT qs.dbid, CASE WHEN lrelid IS NOT NULL THEN lrelid WHEN rrelid IS NOT NULL THEN rrelid END as relid, qs.userid as userid, CASE WHEN lrelid IS NOT NULL THEN lattnum WHEN rrelid IS NOT NULL THEN rattnum END as attnum, qs.opno as opno, qs.qualid as qualid, qs.uniquequalid as uniquequalid, qs.qualnodeid as qualnodeid, qs.uniquequalnodeid as uniquequalnodeid, qs.occurences as occurences, qs.execution_count as execution_count, qs.queryid as queryid, qs.constvalue as constvalue, qs.nbfiltered as nbfiltered, qs.eval_type, qs.constant_position FROM @extschema@.pg_qualstats() qs WHERE (qs.lrelid IS NULL) != (qs.rrelid IS NULL) ) i GROUP BY coalesce(uniquequalid, uniquequalnodeid), coalesce(qualid, qualnodeid), dbid, userid, occurences, execution_count, nbfiltered, queryid ; CREATE OR REPLACE FUNCTION @extschema@.pg_qualstats_deparse_qual(qual qual) RETURNS TEXT AS $_$ SELECT pg_catalog.format('%I.%I %s ?', c.oid::regclass, a.attname, o.oprname) FROM pg_catalog.pg_class c JOIN pg_catalog.pg_attribute a ON a.attrelid = c.oid JOIN pg_catalog.pg_operator o ON o.oid = qual.opno WHERE c.oid = qual.relid AND a.attnum = qual.attnum $_$ LANGUAGE sql; CREATE OR REPLACE FUNCTION @extschema@.pg_qualstats_get_qualnode_rel(bigint) RETURNS TEXT AS $_$ SELECT pg_catalog.quote_ident(n.nspname) || '.' || pg_catalog.quote_ident(c.relname) FROM @extschema@.pg_qualstats() q JOIN pg_catalog.pg_class c ON coalesce(q.lrelid, q.rrelid) = c.oid JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace WHERE q.qualnodeid = $1 $_$ LANGUAGE sql; CREATE OR REPLACE FUNCTION @extschema@.pg_qualstats_get_idx_col(bigint, include_nondefault_opclass boolean = true) RETURNS TEXT AS $_$ SELECT pg_catalog.quote_ident(a.attname) || CASE WHEN include_nondefault_opclass THEN CASE WHEN opc.opcdefault THEN '' ELSE ' ' || pg_catalog.quote_ident(opcname) END ELSE '' END FROM @extschema@.pg_qualstats() q JOIN pg_catalog.pg_class c ON coalesce(q.lrelid, q.rrelid) = c.oid JOIN pg_catalog.pg_attribute a ON a.attrelid = c.oid AND a.attnum = coalesce(q.lattnum, q.rattnum) JOIN pg_catalog.pg_operator op ON op.oid = q.opno JOIN pg_catalog.pg_amop amop ON amop.amopopr = op.oid JOIN pg_catalog.pg_am am ON am.oid = amop.amopmethod JOIN pg_catalog.pg_opfamily f ON f.opfmethod = am.oid AND amop.amopfamily = f.oid JOIN pg_catalog.pg_opclass opc ON opc.opcfamily = f.oid WHERE q.qualnodeid = $1 ORDER BY CASE opcdefault WHEN true THEN 0 ELSE 1 END; $_$ LANGUAGE sql; CREATE OR REPLACE FUNCTION @extschema@.pg_qualstats_index_advisor ( min_filter integer DEFAULT 1000, min_selectivity integer DEFAULT 30, forbidden_am text[] DEFAULT '{}') RETURNS json AS $_$ DECLARE v_processed bigint[] = '{}'; v_indexes json[] = '{}'; v_unoptimised json[] = '{}'; rec record; v_nb_processed integer = 1; v_ddl text; v_col text; v_qualnodeid bigint; v_quals_todo bigint[]; v_quals_done bigint[]; v_quals_col_done text[]; v_queryids bigint[] = '{}'; BEGIN -- sanity checks and default values SELECT coalesce(min_filter, 1000), coalesce(min_selectivity, 30), coalesce(forbidden_am, '{}') INTO min_filter, min_selectivity, forbidden_am; -- don't try to generate hash indexes Before pg 10, as those are only WAL -- logged since pg 11. IF pg_catalog.current_setting('server_version_num')::bigint < 100000 THEN forbidden_am := array_append(forbidden_am, 'hash'); END IF; -- first find out unoptimizable quals. -- We need an array of json containing the per-qual info, and a single -- array containing all the underlying qualnodeids, so we need to create -- the wanted final object manually as we can't have two different grouping -- approach. FOR rec IN WITH src AS (SELECT DISTINCT qualnodeid, (coalesce(lrelid, rrelid), coalesce(lattnum, rattnum), opno, eval_type)::@extschema@.qual AS qual, queryid FROM @extschema@.pg_qualstats() q JOIN pg_catalog.pg_database d ON q.dbid = d.oid LEFT JOIN pg_catalog.pg_operator op ON op.oid = q.opno LEFT JOIN pg_catalog.pg_amop amop ON amop.amopopr = op.oid LEFT JOIN pg_catalog.pg_am am ON am.oid = amop.amopmethod WHERE d.datname = current_database() AND eval_type = 'f' AND coalesce(lrelid, rrelid) != 0 AND amname IS NULL ) SELECT pg_catalog.json_build_object( 'qual', @extschema@.pg_qualstats_deparse_qual(qual), -- be careful to generate an empty array if no queryid availiable 'queryids', coalesce(pg_catalog.array_agg(DISTINCT queryid) FILTER (WHERE queryid IS NOT NULL), '{}') ) AS obj, array_agg(qualnodeid) AS qualnodeids FROM src GROUP BY qual LOOP v_unoptimised := array_append(v_unoptimised, rec.obj); v_processed := array_cat(v_processed, rec.qualnodeids); END LOOP; -- The index suggestion is done in multiple iteration, by scoring for each -- relation containing interesting quals a path of possibly AND-ed quals -- that contains other possibly AND-ed quals. Only the higher score path -- will be used to create an index, so we can then compute another set of -- paths ignoring the quals that are now optimized with an index. WHILE v_nb_processed > 0 LOOP v_nb_processed := 0; FOR rec IN -- first, find quals that seems worth to optimize along with the -- possible access methods, discarding any qualnode that are marked as -- already processed. Also apply access method restriction. WITH pgqs AS ( SELECT dbid, amname, qualid, qualnodeid, (coalesce(lrelid, rrelid), coalesce(lattnum, rattnum), opno, eval_type)::@extschema@.qual AS qual, queryid, round(avg(execution_count)) AS execution_count, sum(occurences) AS occurences, round(sum(nbfiltered)::numeric / sum(occurences)) AS avg_filter, CASE WHEN sum(execution_count) = 0 THEN 0 ELSE round(sum(nbfiltered::numeric) / sum(execution_count) * 100) END AS avg_selectivity FROM @extschema@.pg_qualstats() q JOIN pg_catalog.pg_database d ON q.dbid = d.oid JOIN pg_catalog.pg_operator op ON op.oid = q.opno JOIN pg_catalog.pg_amop amop ON amop.amopopr = op.oid JOIN pg_catalog.pg_am am ON am.oid = amop.amopmethod WHERE d.datname = current_database() AND eval_type = 'f' AND amname != ALL (forbidden_am) AND coalesce(lrelid, rrelid) != 0 AND qualnodeid != ALL(v_processed) GROUP BY dbid, amname, qualid, qualnodeid, lrelid, rrelid, lattnum, rattnum, opno, eval_type, queryid ), -- apply cardinality and selectivity restrictions filtered AS ( SELECT (qual).relid, amname, coalesce(qualid, qualnodeid) AS parent, count(*) AS weight, (array_agg(DISTINCT qualnodeid), array_agg(queryid) )::@extschema@.adv_quals AS quals FROM pgqs WHERE avg_filter >= min_filter AND avg_selectivity >= min_selectivity GROUP BY (qual).relid, amname, parent ), -- for each possibly AND-ed qual, build the list of included qualnodeid nodes AS ( SELECT p.relid, p.amname, p.parent, p.quals, c.quals AS children FROM filtered p LEFT JOIN filtered c ON (p.quals).qualnodeids @> (c.quals).qualnodeids AND p.amname = c.amname AND p.parent != c.parent AND (p.quals).qualnodeids != (c.quals).qualnodeids ), -- build the "paths", which is the list of AND-ed quals that entirely -- contains another possibly AND-ed quals, and give a score for each -- path. The scoring method used here is simply the number of -- columns in the quals. paths AS ( SELECT DISTINCT *, coalesce(pg_catalog.array_length((children).qualnodeids, 1), 0) AS weight FROM nodes UNION SELECT DISTINCT p.relid, p.amname, p.parent, p.quals, c.children, coalesce(pg_catalog.array_length((c.children).qualnodeids, 1), 0) AS weight FROM nodes p JOIN nodes c ON (p.children).qualnodeids @> (c.quals).qualnodeids AND (c.quals).qualnodeids IS NOT NULL AND (c.quals).qualnodeids != (p.quals).qualnodeids AND p.amname = c.amname ), -- compute the final paths. -- The scoring method used here is simply the sum of total -- number of columns in each possibly AND-ed quals, so that we can -- later chose to create indexes that optimize as many queries as -- possible with as few indexes as possible. -- We also compute here an access method weight, so that we can later -- choose a btree index rather than another access method if btree is -- available. computed AS ( SELECT relid, amname, parent, quals, array_agg(to_json(children) ORDER BY weight) FILTER (WHERE children IS NOT NULL) AS included, pg_catalog.array_length((quals).qualnodeids, 1) + sum(weight) AS path_weight, CASE amname WHEN 'btree' THEN 1 ELSE 2 END AS amweight FROM paths GROUP BY relid, amname, parent, quals ), -- compute a rank for each final paths, per relation. final AS ( SELECT relid, amname, parent, quals, included, path_weight, amweight, row_number() OVER ( PARTITION BY relid ORDER BY path_weight DESC, amweight) AS rownum FROM computed ) -- and finally choose the higher rank final path for each relation. SELECT relid, amname, parent, (quals).qualnodeids as quals, (quals).queryids as queryids, included, path_weight FROM final WHERE rownum = 1 LOOP v_nb_processed := v_nb_processed + 1; v_ddl := ''; v_quals_todo := '{}'; v_quals_done := '{}'; v_quals_col_done := '{}'; -- put columns from included quals, if any, first for order dependency DECLARE v_cur json; BEGIN IF rec.included IS NOT NULL THEN FOR v_cur IN SELECT v->'qualnodeids' FROM (SELECT * FROM unnest(rec.included)) AS r(v) ORDER BY pg_catalog.json_array_length(v->'qualnodeids') ASC LOOP -- Direct cast from json to bigint is only possible since pg10 FOR v_qualnodeid IN SELECT pg_catalog.json_array_elements(v_cur)::text::bigint LOOP v_quals_todo := v_quals_todo || v_qualnodeid; END LOOP; END LOOP; END IF; END; -- and append qual's own columns v_quals_todo := v_quals_todo || rec.quals; -- generate the index DDL FOREACH v_qualnodeid IN ARRAY v_quals_todo LOOP -- skip quals already present in the index CONTINUE WHEN v_quals_done @> ARRAY[v_qualnodeid]; -- skip other quals for the same column v_col := @extschema@.pg_qualstats_get_idx_col(v_qualnodeid, false); CONTINUE WHEN v_quals_col_done @> ARRAY[v_col]; -- mark this qual as present in a generated index so it's ignore at -- next round of best quals to optimize v_processed := pg_catalog.array_append(v_processed, v_qualnodeid); -- mark this qual and col as present in this index v_quals_done := v_quals_done || v_qualnodeid; v_quals_col_done := v_quals_col_done || v_col; -- if underlying table has been dropped, stop here CONTINUE WHEN coalesce(v_col, '') = ''; -- append the column to the index IF v_ddl != '' THEN v_ddl := v_ddl || ', '; END IF; v_ddl := v_ddl || @extschema@.pg_qualstats_get_idx_col(v_qualnodeid, true); END LOOP; -- if underlying table has been dropped, skip this (broken) index CONTINUE WHEN coalesce(v_ddl, '') = ''; -- generate the full CREATE INDEX ddl v_ddl = pg_catalog.format('CREATE INDEX ON %s USING %I (%s)', @extschema@.pg_qualstats_get_qualnode_rel(v_qualnodeid), rec.amname, v_ddl); -- get the underlyings queryid(s) DECLARE v_queryid text; v_cur json; BEGIN v_queryids = rec.queryids; IF rec.included IS NOT NULL THEN FOREACH v_cur IN ARRAY rec.included LOOP -- Direct cast from json to bigint is only possible since pg10 FOR v_queryid IN SELECT pg_catalog.json_array_elements(v_cur->'queryids')::text LOOP CONTINUE WHEN v_queryid = 'null'; v_queryids := v_queryids || v_queryid::text::bigint; END LOOP; END LOOP; END IF; END; -- remove any duplicates SELECT pg_catalog.array_agg(DISTINCT v) INTO v_queryids FROM (SELECT unnest(v_queryids)) s(v); -- sanitize the queryids IF v_queryids IS NULL OR v_queryids = '{null}' THEN v_queryids = '{}'; END IF; -- and finally append the index to the list of generated indexes v_indexes := pg_catalog.array_append(v_indexes, pg_catalog.json_build_object( 'ddl', v_ddl, 'queryids', v_queryids ) ); END LOOP; END LOOP; RETURN pg_catalog.json_build_object( 'indexes', v_indexes, 'unoptimised', v_unoptimised); END; $_$ LANGUAGE plpgsql; /* end of pg_qualstats_index_advisor */ pg_qualstats-2.1.1/pg_qualstats.c000066400000000000000000002117011467511452200171060ustar00rootroot00000000000000/*------------------------------------------------------------------------- * * pg_qualstats.c * Track frequently used quals. * * This extension works by installing a hooks on executor. * The ExecutorStart hook will enable some instrumentation for the * queries (INSTRUMENT_ROWS and INSTRUMENT_BUFFERS). * * The ExecutorEnd hook will look for every qual in the query, and * stores the quals of the form: * - EXPR OPERATOR CONSTANT * - EXPR OPERATOR EXPR * * If pg_stat_statements is available, the statistics will be * aggregated by queryid, and a not-normalized statement will be * stored for each different queryid. This can allow third part tools * to do some work on a real query easily. * * The implementation is heavily inspired by pg_stat_statements * * Copyright (c) 2014,2017 Ronan Dunklau * Copyright (c) 2018-2024, The Powa-Team *------------------------------------------------------------------------- */ #include #include #include "postgres.h" #include "access/hash.h" #include "access/htup_details.h" #if PG_VERSION_NUM >= 90600 #include "access/parallel.h" #endif #if PG_VERSION_NUM >= 100000 && PG_VERSION_NUM < 110000 #include "catalog/pg_authid.h" #endif #if PG_VERSION_NUM >= 110000 #include "catalog/pg_authid_d.h" #endif #include "catalog/pg_class.h" #include "catalog/pg_namespace.h" #include "catalog/pg_operator.h" #include "catalog/pg_type.h" #include "commands/dbcommands.h" #if PG_VERSION_NUM >= 150000 #include "common/pg_prng.h" #endif #include "fmgr.h" #include "funcapi.h" #include "mb/pg_wchar.h" #include "miscadmin.h" #include "nodes/execnodes.h" #include "nodes/nodeFuncs.h" #include "nodes/makefuncs.h" #include "optimizer/clauses.h" #include "optimizer/planner.h" #include "parser/analyze.h" #include "parser/parse_node.h" #include "parser/parsetree.h" #if PG_VERSION_NUM >= 150000 #include "postmaster/autovacuum.h" #endif #include "postmaster/postmaster.h" #if PG_VERSION_NUM >= 150000 #include "replication/walsender.h" #endif #include "storage/ipc.h" #include "storage/lwlock.h" #if PG_VERSION_NUM >= 100000 #include "storage/shmem.h" #endif #include "utils/array.h" #include "utils/builtins.h" #include "utils/guc.h" #include "utils/lsyscache.h" #include "utils/memutils.h" #include "utils/tuplestore.h" PG_MODULE_MAGIC; #define PGQS_NAME_COLUMNS 7 /* number of column added when using * pg_qualstats_column SRF */ #define PGQS_USAGE_DEALLOC_PERCENT 5 /* free this % of entries at once */ #define PGQS_MAX_DEFAULT 1000 /* default pgqs_max value */ #define PGQS_MAX_LOCAL_ENTRIES (pgqs_max * 0.2) /* do not track more of * 20% of possible entries * in shared mem */ #define PGQS_CONSTANT_SIZE 80 /* Truncate constant representation at 80 */ #define PGQS_FLAGS (INSTRUMENT_ROWS|INSTRUMENT_BUFFERS) #define PGQS_RATIO 0 #define PGQS_NUM 1 #define PGQS_LWL_ACQUIRE(lock, mode) if (!pgqs_backend) { \ LWLockAcquire(lock, mode); \ } #define PGQS_LWL_RELEASE(lock) if (!pgqs_backend) { \ LWLockRelease(lock); \ } #if PG_VERSION_NUM < 170000 #define MyProcNumber MyBackendId #define ParallelLeaderProcNumber ParallelLeaderBackendId #endif #if PG_VERSION_NUM < 140000 #define ParallelLeaderBackendId ParallelMasterBackendId #endif /* * Extension version number, for supporting older extension versions' objects */ typedef enum pgqsVersion { PGQS_V1_0 = 0, PGQS_V2_0 } pgqsVersion; /*---- Function declarations ----*/ extern PGDLLEXPORT void _PG_init(void); extern PGDLLEXPORT Datum pg_qualstats_reset(PG_FUNCTION_ARGS); extern PGDLLEXPORT Datum pg_qualstats(PG_FUNCTION_ARGS); extern PGDLLEXPORT Datum pg_qualstats_2_0(PG_FUNCTION_ARGS); extern PGDLLEXPORT Datum pg_qualstats_names(PG_FUNCTION_ARGS); extern PGDLLEXPORT Datum pg_qualstats_names_2_0(PG_FUNCTION_ARGS); static Datum pg_qualstats_common(PG_FUNCTION_ARGS, pgqsVersion api_version, bool include_names); extern PGDLLEXPORT Datum pg_qualstats_example_query(PG_FUNCTION_ARGS); extern PGDLLEXPORT Datum pg_qualstats_example_queries(PG_FUNCTION_ARGS); PG_FUNCTION_INFO_V1(pg_qualstats_reset); PG_FUNCTION_INFO_V1(pg_qualstats); PG_FUNCTION_INFO_V1(pg_qualstats_2_0); PG_FUNCTION_INFO_V1(pg_qualstats_names); PG_FUNCTION_INFO_V1(pg_qualstats_names_2_0); PG_FUNCTION_INFO_V1(pg_qualstats_example_query); PG_FUNCTION_INFO_V1(pg_qualstats_example_queries); static void pgqs_backend_mode_startup(void); #if PG_VERSION_NUM >= 150000 static void pgqs_shmem_request(void); #endif static void pgqs_shmem_startup(void); static void pgqs_ExecutorStart(QueryDesc *queryDesc, int eflags); static void pgqs_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, #if PG_VERSION_NUM >= 90600 uint64 count #else long count #endif #if PG_VERSION_NUM >= 100000 , bool execute_once #endif ); static void pgqs_ExecutorFinish(QueryDesc *queryDesc); static void pgqs_ExecutorEnd(QueryDesc *queryDesc); static ExecutorStart_hook_type prev_ExecutorStart = NULL; static ExecutorRun_hook_type prev_ExecutorRun = NULL; static ExecutorFinish_hook_type prev_ExecutorFinish = NULL; static ExecutorEnd_hook_type prev_ExecutorEnd = NULL; #if PG_VERSION_NUM >= 150000 static shmem_request_hook_type prev_shmem_request_hook = NULL; #endif static shmem_startup_hook_type prev_shmem_startup_hook = NULL; static uint32 pgqs_hash_fn(const void *key, Size keysize); #if PG_VERSION_NUM < 90500 static uint32 pgqs_uint32_hashfn(const void *key, Size keysize); #endif static bool pgqs_backend = false; static int pgqs_query_size; static int pgqs_max = PGQS_MAX_DEFAULT; /* max # statements to track */ static bool pgqs_track_pgcatalog; /* track queries on pg_catalog */ static bool pgqs_resolve_oids; /* resolve oids */ static bool pgqs_enabled; static bool pgqs_track_constants; static double pgqs_sample_rate; static int pgqs_min_err_ratio; static int pgqs_min_err_num; static int query_is_sampled; /* Is the current query sampled, per backend */ static int nesting_level = 0; /* Current nesting depth of ExecutorRun calls */ static bool pgqs_assign_sample_rate_check_hook(double *newval, void **extra, GucSource source); #if PG_VERSION_NUM > 90600 static void pgqs_set_query_sampled(bool sample); #endif static bool pgqs_is_query_sampled(void); /*---- Data structures declarations ----*/ typedef struct pgqsSharedState { #if PG_VERSION_NUM >= 90400 LWLock *lock; /* protects counters hashtable * search/modification */ LWLock *querylock; /* protects query hashtable * search/modification */ #else LWLockId lock; /* protects counters hashtable * search/modification */ LWLockId querylock; /* protects query hashtable * search/modification */ #endif #if PG_VERSION_NUM >= 90600 LWLock *sampledlock; /* protects sampled array search/modification */ bool sampled[FLEXIBLE_ARRAY_MEMBER]; /* should we sample this * query? */ #endif } pgqsSharedState; /* Since cff440d368, queryid becomes a uint64 internally. */ #if PG_VERSION_NUM >= 110000 typedef uint64 pgqs_queryid; #else typedef uint32 pgqs_queryid; #endif typedef struct pgqsHashKey { Oid userid; /* user OID */ Oid dbid; /* database OID */ pgqs_queryid queryid; /* query identifier (if set by another plugin */ uint32 uniquequalnodeid; /* Hash of the const */ uint32 uniquequalid; /* Hash of the parent, including the consts */ char evaltype; /* Evaluation type. Can be 'f' to mean a qual * executed after a scan, or 'i' for an * indexqual */ } pgqsHashKey; typedef struct pgqsNames { NameData rolname; NameData datname; NameData lrelname; NameData lattname; NameData opname; NameData rrelname; NameData rattname; } pgqsNames; typedef struct pgqsEntry { pgqsHashKey key; Oid lrelid; /* LHS relation OID or NULL if not var */ AttrNumber lattnum; /* LHS attribute Number or NULL if not var */ Oid opoid; /* Operator OID */ Oid rrelid; /* RHS relation OID or NULL if not var */ AttrNumber rattnum; /* RHS attribute Number or NULL if not var */ char constvalue[PGQS_CONSTANT_SIZE]; /* Textual representation of * the right hand constant, if * any */ uint32 qualid; /* Hash of the parent AND expression if any, 0 * otherwise. */ uint32 qualnodeid; /* Hash of the node itself */ int64 count; /* # of operator execution */ int64 nbfiltered; /* # of lines discarded by the operator */ int position; /* content position in query text */ double usage; /* # of qual execution, used for deallocation */ double min_err_estim[2]; /* min estimation error ratio and num */ double max_err_estim[2]; /* max estimation error ratio and num */ double mean_err_estim[2]; /* mean estimation error ratio and num */ double sum_err_estim[2]; /* sum of variances in estimation error * ratio and num */ int64 occurences; /* # of qual execution, 1 per query */ } pgqsEntry; typedef struct pgqsEntryWithNames { pgqsEntry entry; pgqsNames names; } pgqsEntryWithNames; typedef struct pgqsQueryStringHashKey { pgqs_queryid queryid; } pgqsQueryStringHashKey; typedef struct pgqsQueryStringEntry { pgqsQueryStringHashKey key; /* * Imperatively at the end of the struct This is actually of length * query_size, which is track_activity_query_size */ char querytext[1]; } pgqsQueryStringEntry; /* * Transient state of the query tree walker - for the meaning of the counters, * see pgqsEntry comments. */ typedef struct pgqsWalkerContext { pgqs_queryid queryId; List *rtable; PlanState *planstate; PlanState *inner_planstate; PlanState *outer_planstate; List *outer_tlist; List *inner_tlist; List *index_tlist; uint32 qualid; uint32 uniquequalid; /* Hash of the parent, including the consts */ int64 count; int64 nbfiltered; double err_estim[2]; int nentries; /* number of entries found so far */ char evaltype; const char *querytext; } pgqsWalkerContext; static bool pgqs_whereclause_tree_walker(Node *node, pgqsWalkerContext *query); static pgqsEntry *pgqs_process_opexpr(OpExpr *expr, pgqsWalkerContext *context); static pgqsEntry *pgqs_process_scalararrayopexpr(ScalarArrayOpExpr *expr, pgqsWalkerContext *context); static pgqsEntry *pgqs_process_booltest(BooleanTest *expr, pgqsWalkerContext *context); static void pgqs_collectNodeStats(PlanState *planstate, List *ancestors, pgqsWalkerContext *context); static void pgqs_collectMemberNodeStats(int nplans, PlanState **planstates, List *ancestors, pgqsWalkerContext *context); static void pgqs_collectSubPlanStats(List *plans, List *ancestors, pgqsWalkerContext *context); static uint32 hashExpr(Expr *expr, pgqsWalkerContext *context, bool include_const); static void exprRepr(Expr *expr, StringInfo buffer, pgqsWalkerContext *context, bool include_const); static void pgqs_set_planstates(PlanState *planstate, pgqsWalkerContext *context); static Expr *pgqs_resolve_var(Var *var, pgqsWalkerContext *context); static void pgqs_entry_dealloc(void); static inline void pgqs_entry_init(pgqsEntry *entry); static inline void pgqs_entry_copy_raw(pgqsEntry *dest, pgqsEntry *src); static void pgqs_entry_err_estim(pgqsEntry *e, double *err_estim, int64 occurences); static void pgqs_queryentry_dealloc(void); static void pgqs_localentry_dealloc(int nvictims); static void pgqs_fillnames(pgqsEntryWithNames *entry); static Size pgqs_memsize(void); #if PG_VERSION_NUM >= 90600 static Size pgqs_sampled_array_size(void); #endif /* Global Hash */ static HTAB *pgqs_hash = NULL; static HTAB *pgqs_query_examples_hash = NULL; static pgqsSharedState *pgqs = NULL; /* Local Hash */ static HTAB *pgqs_localhash = NULL; void _PG_init(void) { if (!process_shared_preload_libraries_in_progress) { elog(WARNING, "Without shared_preload_libraries, only current backend stats will be available."); pgqs_backend = true; } else { pgqs_backend = false; #if PG_VERSION_NUM >= 150000 prev_shmem_request_hook = shmem_request_hook; shmem_request_hook = pgqs_shmem_request; #endif prev_shmem_startup_hook = shmem_startup_hook; shmem_startup_hook = pgqs_shmem_startup; } prev_ExecutorStart = ExecutorStart_hook; ExecutorStart_hook = pgqs_ExecutorStart; prev_ExecutorRun = ExecutorRun_hook; ExecutorRun_hook = pgqs_ExecutorRun; prev_ExecutorFinish = ExecutorFinish_hook; ExecutorFinish_hook = pgqs_ExecutorFinish; prev_ExecutorEnd = ExecutorEnd_hook; ExecutorEnd_hook = pgqs_ExecutorEnd; DefineCustomBoolVariable("pg_qualstats.enabled", "Enable / Disable pg_qualstats", NULL, &pgqs_enabled, true, PGC_USERSET, 0, NULL, NULL, NULL); DefineCustomBoolVariable("pg_qualstats.track_constants", "Enable / Disable pg_qualstats constants tracking", NULL, &pgqs_track_constants, true, PGC_USERSET, 0, NULL, NULL, NULL); DefineCustomIntVariable("pg_qualstats.max", "Sets the maximum number of statements tracked by pg_qualstats.", NULL, &pgqs_max, PGQS_MAX_DEFAULT, 100, INT_MAX, pgqs_backend ? PGC_USERSET : PGC_POSTMASTER, 0, NULL, NULL, NULL); if (!pgqs_backend) DefineCustomBoolVariable("pg_qualstats.resolve_oids", "Store names alongside the oid. Eats MUCH more space!", NULL, &pgqs_resolve_oids, false, PGC_POSTMASTER, 0, NULL, NULL, NULL); DefineCustomBoolVariable("pg_qualstats.track_pg_catalog", "Track quals on system catalogs too.", NULL, &pgqs_track_pgcatalog, false, PGC_USERSET, 0, NULL, NULL, NULL); DefineCustomRealVariable("pg_qualstats.sample_rate", "Sampling rate. 1 means every query, 0.2 means 1 in five queries", NULL, &pgqs_sample_rate, -1, -1, 1, PGC_USERSET, 0, pgqs_assign_sample_rate_check_hook, NULL, NULL); DefineCustomIntVariable("pg_qualstats.min_err_estimate_ratio", "Error estimation ratio threshold to save quals", NULL, &pgqs_min_err_ratio, 0, 0, INT_MAX, PGC_USERSET, 0, NULL, NULL, NULL); DefineCustomIntVariable("pg_qualstats.min_err_estimate_num", "Error estimation num threshold to save quals", NULL, &pgqs_min_err_num, 0, 0, INT_MAX, PGC_USERSET, 0, NULL, NULL, NULL); EmitWarningsOnPlaceholders("pg_qualstats"); parse_int(GetConfigOption("track_activity_query_size", false, false), &pgqs_query_size, 0, NULL); if (!pgqs_backend) { #if PG_VERSION_NUM < 150000 RequestAddinShmemSpace(pgqs_memsize()); #if PG_VERSION_NUM >= 90600 RequestNamedLWLockTranche("pg_qualstats", 3); #else RequestAddinLWLocks(2); #endif /* pg9.6+ */ #endif /* pg15- */ } else pgqs_backend_mode_startup(); } /* * Check that the sample ratio is in the correct interval */ static bool pgqs_assign_sample_rate_check_hook(double *newval, void **extra, GucSource source) { double val = *newval; if ((val < 0 && val != -1) || (val > 1)) return false; if (val == -1) *newval = 1. / MaxConnections; return true; } #if PG_VERSION_NUM >= 90600 static void pgqs_set_query_sampled(bool sample) { /* the decisions should only be made in leader */ Assert(!IsParallelWorker()); /* not supported in backend mode */ if (pgqs_backend) return; /* in worker processes we need to get the info from shared memory */ LWLockAcquire(pgqs->sampledlock, LW_EXCLUSIVE); pgqs->sampled[MyProcNumber] = sample; LWLockRelease(pgqs->sampledlock); } #endif static bool pgqs_is_query_sampled(void) { #if PG_VERSION_NUM >= 90600 bool sampled; /* in leader we can just check the global variable */ if (!IsParallelWorker()) return query_is_sampled; /* not supported in backend mode */ if (pgqs_backend) return false; /* in worker processes we need to get the info from shared memory */ PGQS_LWL_ACQUIRE(pgqs->sampledlock, LW_SHARED); sampled = pgqs->sampled[ParallelLeaderProcNumber]; PGQS_LWL_RELEASE(pgqs->sampledlock); return sampled; #else return query_is_sampled; #endif } /* * Do catalog search to replace oids with corresponding objects name */ void pgqs_fillnames(pgqsEntryWithNames *entry) { #if PG_VERSION_NUM >= 110000 #define GET_ATTNAME(r, a) get_attname(r, a, false) #else #define GET_ATTNAME(r, a) get_attname(r, a) #endif #if PG_VERSION_NUM >= 90500 namestrcpy(&(entry->names.rolname), GetUserNameFromId(entry->entry.key.userid, true)); #else namestrcpy(&(entry->names.rolname), GetUserNameFromId(entry->entry.key.userid)); #endif namestrcpy(&(entry->names.datname), get_database_name(entry->entry.key.dbid)); if (entry->entry.lrelid != InvalidOid) { namestrcpy(&(entry->names.lrelname), get_rel_name(entry->entry.lrelid)); namestrcpy(&(entry->names.lattname), GET_ATTNAME(entry->entry.lrelid, entry->entry.lattnum)); } if (entry->entry.opoid != InvalidOid) namestrcpy(&(entry->names.opname), get_opname(entry->entry.opoid)); if (entry->entry.rrelid != InvalidOid) { namestrcpy(&(entry->names.rrelname), get_rel_name(entry->entry.rrelid)); namestrcpy(&(entry->names.rattname), GET_ATTNAME(entry->entry.rrelid, entry->entry.rattnum)); } #undef GET_ATTNAME } /* * Request rows and buffers instrumentation if pgqs is enabled */ static void pgqs_ExecutorStart(QueryDesc *queryDesc, int eflags) { /* Setup instrumentation */ if (pgqs_enabled) { /* * For rate sampling, randomly choose top-level statement. Either all * nested statements will be explained or none will. */ if (nesting_level == 0 #if PG_VERSION_NUM >= 90600 && (!IsParallelWorker()) #endif ) { #if PG_VERSION_NUM >= 150000 query_is_sampled = (pg_prng_double(&pg_global_prng_state) < pgqs_sample_rate); #else query_is_sampled = (random() <= (MAX_RANDOM_VALUE * pgqs_sample_rate)); #endif #if PG_VERSION_NUM >= 90600 pgqs_set_query_sampled(query_is_sampled); #endif } if (pgqs_is_query_sampled()) queryDesc->instrument_options |= PGQS_FLAGS; } if (prev_ExecutorStart) prev_ExecutorStart(queryDesc, eflags); else standard_ExecutorStart(queryDesc, eflags); } /* * ExecutorRun hook: all we need do is track nesting depth */ static void pgqs_ExecutorRun(QueryDesc *queryDesc, ScanDirection direction, #if PG_VERSION_NUM >= 90600 uint64 count #else long count #endif #if PG_VERSION_NUM >= 100000 ,bool execute_once #endif ) { nesting_level++; PG_TRY(); { if (prev_ExecutorRun) #if PG_VERSION_NUM >= 100000 prev_ExecutorRun(queryDesc, direction, count, execute_once); #else prev_ExecutorRun(queryDesc, direction, count); #endif else #if PG_VERSION_NUM >= 100000 standard_ExecutorRun(queryDesc, direction, count, execute_once); #else standard_ExecutorRun(queryDesc, direction, count); #endif nesting_level--; } PG_CATCH(); { nesting_level--; PG_RE_THROW(); } PG_END_TRY(); } /* * ExecutorFinish hook: all we need do is track nesting depth */ static void pgqs_ExecutorFinish(QueryDesc *queryDesc) { nesting_level++; PG_TRY(); { if (prev_ExecutorFinish) prev_ExecutorFinish(queryDesc); else standard_ExecutorFinish(queryDesc); nesting_level--; } PG_CATCH(); { nesting_level--; PG_RE_THROW(); } PG_END_TRY(); } /* * Save a non normalized query for the queryid if no one already exists, and * do all the stat collecting job */ static void pgqs_ExecutorEnd(QueryDesc *queryDesc) { pgqsQueryStringHashKey queryKey; bool found; if ((pgqs || pgqs_backend) && pgqs_enabled && pgqs_is_query_sampled() #if PG_VERSION_NUM >= 90600 && (!IsParallelWorker()) #endif /* * multiple ExecutorStart/ExecutorEnd can be interleaved, so when sampling * is activated there's no guarantee that pgqs_is_query_sampled() will * only detect queries that were actually sampled (thus having the * required instrumentation set up). To avoid such cases, we double check * that we have the required instrumentation set up. That won't exactly * detect the sampled queries, but that should be close enough and avoid * adding to much complexity. */ && (queryDesc->instrument_options & PGQS_FLAGS) == PGQS_FLAGS ) { HASHCTL info; pgqsEntry *localentry; HASH_SEQ_STATUS local_hash_seq; /* We need to switch to the per-query memory context */ MemoryContext oldcxt = MemoryContextSwitchTo(queryDesc->estate->es_query_cxt); pgqsWalkerContext *context = palloc(sizeof(pgqsWalkerContext)); context->queryId = queryDesc->plannedstmt->queryId; context->rtable = queryDesc->plannedstmt->rtable; context->count = 0; context->qualid = 0; context->uniquequalid = 0; context->nbfiltered = 0; context->evaltype = 0; context->nentries = 0; context->querytext = queryDesc->sourceText; queryKey.queryid = context->queryId; /* keep an unormalized query example for each queryid if needed */ if (pgqs_track_constants) { /* Lookup the hash table entry with a shared lock. */ PGQS_LWL_ACQUIRE(pgqs->querylock, LW_SHARED); hash_search_with_hash_value(pgqs_query_examples_hash, &queryKey, context->queryId, HASH_FIND, &found); /* Create the new entry if not present */ if (!found) { pgqsQueryStringEntry *queryEntry; bool excl_found; /* Need exclusive lock to add a new hashtable entry - promote */ PGQS_LWL_RELEASE(pgqs->querylock); PGQS_LWL_ACQUIRE(pgqs->querylock, LW_EXCLUSIVE); while (hash_get_num_entries(pgqs_query_examples_hash) >= pgqs_max) pgqs_queryentry_dealloc(); queryEntry = (pgqsQueryStringEntry *) hash_search_with_hash_value(pgqs_query_examples_hash, &queryKey, context->queryId, HASH_ENTER, &excl_found); /* Make sure it wasn't added by another backend */ if (!excl_found) strncpy(queryEntry->querytext, context->querytext, pgqs_query_size); } PGQS_LWL_RELEASE(pgqs->querylock); } /* create local hash table if it hasn't been created yet */ if (!pgqs_localhash) { memset(&info, 0, sizeof(info)); info.keysize = sizeof(pgqsHashKey); if (pgqs_resolve_oids) info.entrysize = sizeof(pgqsEntryWithNames); else info.entrysize = sizeof(pgqsEntry); info.hash = pgqs_hash_fn; pgqs_localhash = hash_create("pgqs_localhash", 50, &info, HASH_ELEM | HASH_FUNCTION); } /* retrieve quals informations, main work starts from here */ pgqs_collectNodeStats(queryDesc->planstate, NIL, context); /* if any quals found, store them in shared memory */ if (context->nentries) { /* * Before acquiring exlusive lwlock, check if there's enough room * to store local hash. Also, do not remove more than 20% of * maximum number of entries in shared memory (wether they are * used or not). This should not happen since we shouldn't store * that much entries in localhash in the first place. */ int nvictims = hash_get_num_entries(pgqs_localhash) - PGQS_MAX_LOCAL_ENTRIES; if (nvictims > 0) pgqs_localentry_dealloc(nvictims); PGQS_LWL_ACQUIRE(pgqs->lock, LW_EXCLUSIVE); while (hash_get_num_entries(pgqs_hash) + hash_get_num_entries(pgqs_localhash) >= pgqs_max) pgqs_entry_dealloc(); hash_seq_init(&local_hash_seq, pgqs_localhash); while ((localentry = hash_seq_search(&local_hash_seq)) != NULL) { pgqsEntry *newEntry = (pgqsEntry *) hash_search(pgqs_hash, &localentry->key, HASH_ENTER, &found); if (!found) { /* raw copy the local entry */ pgqs_entry_copy_raw(newEntry, localentry); } else { /* only update counters value */ newEntry->count += localentry->count; newEntry->nbfiltered += localentry->nbfiltered; newEntry->usage += localentry->usage; /* compute estimation error min, max, mean and variance */ pgqs_entry_err_estim(newEntry, localentry->mean_err_estim, localentry->occurences); } /* cleanup local hash */ hash_search(pgqs_localhash, &localentry->key, HASH_REMOVE, NULL); } PGQS_LWL_RELEASE(pgqs->lock); } MemoryContextSwitchTo(oldcxt); } if (prev_ExecutorEnd) prev_ExecutorEnd(queryDesc); else standard_ExecutorEnd(queryDesc); } /* * qsort comparator for sorting into increasing usage order */ static int entry_cmp(const void *lhs, const void *rhs) { double l_usage = (*(pgqsEntry *const *) lhs)->usage; double r_usage = (*(pgqsEntry *const *) rhs)->usage; if (l_usage < r_usage) return -1; else if (l_usage > r_usage) return +1; else return 0; } /* * Deallocate least used entries. * Caller must hold an exlusive lock on pgqs->lock */ static void pgqs_entry_dealloc(void) { HASH_SEQ_STATUS hash_seq; pgqsEntry **entries; pgqsEntry *entry; int nvictims; int i; int base_size; /* * Sort entries by usage and deallocate PGQS_USAGE_DEALLOC_PERCENT of * them. While we're scanning the table, apply the decay factor to the * usage values. * pgqs_resolve_oids is irrelevant here as the array stores pointers * instead of entries. The struct member used for the sort are part of * pgqsEntry. */ base_size = sizeof(pgqsEntry *); entries = palloc(hash_get_num_entries(pgqs_hash) * base_size); i = 0; hash_seq_init(&hash_seq, pgqs_hash); while ((entry = hash_seq_search(&hash_seq)) != NULL) { entries[i++] = entry; entry->usage *= 0.99; } qsort(entries, i, base_size, entry_cmp); nvictims = Max(10, i * PGQS_USAGE_DEALLOC_PERCENT / 100); nvictims = Min(nvictims, i); for (i = 0; i < nvictims; i++) hash_search(pgqs_hash, &entries[i]->key, HASH_REMOVE, NULL); pfree(entries); } /* Initialize all non-key fields of the given entry. */ static inline void pgqs_entry_init(pgqsEntry *entry) { /* Note that pgqsNames if needed will be explicitly filled after this */ memset(&(entry->lrelid), 0, sizeof(pgqsEntry) - sizeof(pgqsHashKey)); } /* Copy non-key and non-name fields from the given entry */ static inline void pgqs_entry_copy_raw(pgqsEntry *dest, pgqsEntry *src) { /* Note that pgqsNames if needed will be explicitly filled after this */ memcpy(&(dest->lrelid), &(src->lrelid), (sizeof(pgqsEntry) - sizeof(pgqsHashKey))); } /* * Accurately compute estimation error ratio and num variance using Welford's * method. See * Also maintain min and max values. */ static void pgqs_entry_err_estim(pgqsEntry *e, double *err_estim, int64 occurences) { int i; e->occurences += occurences; for (i = 0; i < 2; i++) { if ((e->occurences - occurences) == 0) { e->min_err_estim[i] = err_estim[i]; e->max_err_estim[i] = err_estim[i]; e->mean_err_estim[i] = err_estim[i]; } else { double old_err = e->mean_err_estim[i]; e->mean_err_estim[i] += (err_estim[i] - old_err) / e->occurences; e->sum_err_estim[i] += (err_estim[i] - old_err) * (err_estim[i] - e->mean_err_estim[i]); } /* calculate min/max counters */ if (e->min_err_estim[i] > err_estim[i]) e->min_err_estim[i] = err_estim[i]; if (e->max_err_estim[i] < err_estim[i]) e->max_err_estim[i] = err_estim[i]; } } /* * Deallocate the first example query. * Caller must hold an exlusive lock on pgqs->querylock */ static void pgqs_queryentry_dealloc(void) { HASH_SEQ_STATUS hash_seq; pgqsQueryStringEntry *entry; hash_seq_init(&hash_seq, pgqs_query_examples_hash); entry = hash_seq_search(&hash_seq); if (entry != NULL) { hash_search_with_hash_value(pgqs_query_examples_hash, &entry->key, entry->key.queryid, HASH_REMOVE, NULL); hash_seq_term(&hash_seq); } } /* * Remove the requested number of entries from pgqs_localhash. Since the * entries are all coming from the same query, remove them without any specific * sort. */ static void pgqs_localentry_dealloc(int nvictims) { pgqsEntry *localentry; HASH_SEQ_STATUS local_hash_seq; pgqsHashKey **victims; bool need_seq_term = true; int i, ptr = 0; if (nvictims <= 0) return; victims = palloc(sizeof(pgqsHashKey *) * nvictims); hash_seq_init(&local_hash_seq, pgqs_localhash); while (nvictims-- >= 0) { localentry = hash_seq_search(&local_hash_seq); /* check if caller required too many victims */ if (!localentry) { need_seq_term = false; break; } victims[ptr++] = &localentry->key; } if (need_seq_term) hash_seq_term(&local_hash_seq); for (i = 0; i < ptr; i++) hash_search(pgqs_localhash, victims[i], HASH_REMOVE, NULL); pfree(victims); } static void pgqs_collectNodeStats(PlanState *planstate, List *ancestors, pgqsWalkerContext *context) { Plan *plan = planstate->plan; Instrumentation *instrument = planstate->instrument; int64 oldcount = context->count; double oldfiltered = context->nbfiltered; double old_err_ratio = context->err_estim[PGQS_RATIO]; double old_err_num = context->err_estim[PGQS_NUM]; double total_filtered = 0; ListCell *lc; List *parent = 0; List *indexquals = 0; List *quals = 0; context->planstate = planstate; /* * We have to forcibly clean up the instrumentation state because we * haven't done ExecutorEnd yet. This is pretty grotty ... */ if (instrument) InstrEndLoop(instrument); /* Retrieve the generic quals and indexquals */ switch (nodeTag(plan)) { case T_IndexOnlyScan: indexquals = ((IndexOnlyScan *) plan)->indexqual; quals = plan->qual; break; case T_IndexScan: indexquals = ((IndexScan *) plan)->indexqualorig; quals = plan->qual; break; case T_BitmapIndexScan: indexquals = ((BitmapIndexScan *) plan)->indexqualorig; quals = plan->qual; break; case T_CteScan: case T_SeqScan: case T_BitmapHeapScan: case T_TidScan: case T_SubqueryScan: case T_FunctionScan: case T_ValuesScan: case T_WorkTableScan: case T_ForeignScan: case T_ModifyTable: quals = plan->qual; break; case T_NestLoop: quals = ((NestLoop *) plan)->join.joinqual; break; case T_MergeJoin: quals = ((MergeJoin *) plan)->mergeclauses; break; case T_HashJoin: quals = ((HashJoin *) plan)->hashclauses; break; default: break; } pgqs_set_planstates(planstate, context); parent = list_union(indexquals, quals); if (list_length(parent) > 1) { context->uniquequalid = hashExpr((Expr *) parent, context, true); context->qualid = hashExpr((Expr *) parent, context, false); } total_filtered = instrument->nfiltered1 + instrument->nfiltered2; context->nbfiltered = total_filtered; context->count = instrument->tuplecount + instrument->ntuples + total_filtered; if (plan->plan_rows == instrument->ntuples) { context->err_estim[PGQS_RATIO] = 0; context->err_estim[PGQS_NUM] = 0; } else if (plan->plan_rows > instrument->ntuples) { /* XXX should use use a bigger value? */ if (instrument->ntuples == 0) context->err_estim[PGQS_RATIO] = plan->plan_rows * 1.0L; else context->err_estim[PGQS_RATIO] = plan->plan_rows * 1.0L / instrument->ntuples; context->err_estim[PGQS_NUM] = plan->plan_rows - instrument->ntuples; } else { /* plan_rows cannot be zero */ context->err_estim[PGQS_RATIO] = instrument->ntuples * 1.0L / plan->plan_rows; context->err_estim[PGQS_NUM] = instrument->ntuples - plan->plan_rows; } if (context->err_estim[PGQS_RATIO] >= pgqs_min_err_ratio && context->err_estim[PGQS_NUM] >= pgqs_min_err_num) { /* Add the indexquals */ context->evaltype = 'i'; expression_tree_walker((Node *) indexquals, pgqs_whereclause_tree_walker, context); /* Add the generic quals */ context->evaltype = 'f'; expression_tree_walker((Node *) quals, pgqs_whereclause_tree_walker, context); } context->qualid = 0; context->uniquequalid = 0; context->count = oldcount; context->nbfiltered = oldfiltered; context->err_estim[PGQS_RATIO] = old_err_ratio; context->err_estim[PGQS_NUM] = old_err_num; foreach(lc, planstate->initPlan) { SubPlanState *sps = (SubPlanState *) lfirst(lc); pgqs_collectNodeStats(sps->planstate, ancestors, context); } /* lefttree */ if (outerPlanState(planstate)) pgqs_collectNodeStats(outerPlanState(planstate), ancestors, context); /* righttree */ if (innerPlanState(planstate)) pgqs_collectNodeStats(innerPlanState(planstate), ancestors, context); /* special child plans */ switch (nodeTag(plan)) { #if PG_VERSION_NUM < 140000 case T_ModifyTable: pgqs_collectMemberNodeStats(((ModifyTableState *) planstate)->mt_nplans, ((ModifyTableState *) planstate)->mt_plans, ancestors, context); break; #endif case T_Append: pgqs_collectMemberNodeStats(((AppendState *) planstate)->as_nplans, ((AppendState *) planstate)->appendplans, ancestors, context); break; case T_MergeAppend: pgqs_collectMemberNodeStats(((MergeAppendState *) planstate)->ms_nplans, ((MergeAppendState *) planstate)->mergeplans, ancestors, context); break; case T_BitmapAnd: pgqs_collectMemberNodeStats(((BitmapAndState *) planstate)->nplans, ((BitmapAndState *) planstate)->bitmapplans, ancestors, context); break; case T_BitmapOr: pgqs_collectMemberNodeStats(((BitmapOrState *) planstate)->nplans, ((BitmapOrState *) planstate)->bitmapplans, ancestors, context); break; case T_SubqueryScan: pgqs_collectNodeStats(((SubqueryScanState *) planstate)->subplan, ancestors, context); break; default: break; } /* subPlan-s */ if (planstate->subPlan) pgqs_collectSubPlanStats(planstate->subPlan, ancestors, context); } static void pgqs_collectMemberNodeStats(int nplans, PlanState **planstates, List *ancestors, pgqsWalkerContext *context) { int i; for (i = 0; i < nplans; i++) pgqs_collectNodeStats(planstates[i], ancestors, context); } static void pgqs_collectSubPlanStats(List *plans, List *ancestors, pgqsWalkerContext *context) { ListCell *lst; foreach(lst, plans) { SubPlanState *sps = (SubPlanState *) lfirst(lst); pgqs_collectNodeStats(sps->planstate, ancestors, context); } } static pgqsEntry * pgqs_process_scalararrayopexpr(ScalarArrayOpExpr *expr, pgqsWalkerContext *context) { OpExpr *op = makeNode(OpExpr); int len = 0; pgqsEntry *entry = NULL; Expr *array = lsecond(expr->args); op->opno = expr->opno; op->opfuncid = expr->opfuncid; op->inputcollid = expr->inputcollid; op->opresulttype = BOOLOID; op->args = expr->args; switch (array->type) { case T_ArrayExpr: len = list_length(((ArrayExpr *) array)->elements); break; case T_Const: /* Const is an array. */ { Const *arrayconst = (Const *) array; ArrayType *array_type; if (arrayconst->constisnull) return NULL; array_type = DatumGetArrayTypeP(arrayconst->constvalue); if (ARR_NDIM(array_type) > 0) len = ARR_DIMS(array_type)[0]; } break; default: break; } if (len > 0) { context->count *= len; entry = pgqs_process_opexpr(op, context); } return entry; } static pgqsEntry * pgqs_process_booltest(BooleanTest *expr, pgqsWalkerContext *context) { pgqsHashKey key; pgqsEntry *entry; bool found; Var *var; Expr *newexpr = NULL; char *constant; Oid opoid; RangeTblEntry *rte; /* do not store more than 20% of possible entries in shared mem */ if (context->nentries >= PGQS_MAX_LOCAL_ENTRIES) return NULL; if (IsA(expr->arg, Var)) newexpr = pgqs_resolve_var((Var *) expr->arg, context); if (!(newexpr && IsA(newexpr, Var))) return NULL; var = (Var *) newexpr; rte = list_nth(context->rtable, var->varno - 1); switch (expr->booltesttype) { case IS_TRUE: constant = "TRUE::bool"; opoid = BooleanEqualOperator; break; case IS_FALSE: constant = "FALSE::bool"; opoid = BooleanEqualOperator; break; case IS_NOT_TRUE: constant = "TRUE::bool"; opoid = BooleanNotEqualOperator; break; case IS_NOT_FALSE: constant = "FALSE::bool"; opoid = BooleanNotEqualOperator; break; case IS_UNKNOWN: constant = "NULL::bool"; opoid = BooleanEqualOperator; break; case IS_NOT_UNKNOWN: constant = "NULL::bool"; opoid = BooleanNotEqualOperator; break; default: /* Bail out */ return NULL; } memset(&key, 0, sizeof(pgqsHashKey)); key.userid = GetUserId(); key.dbid = MyDatabaseId; key.uniquequalid = context->uniquequalid; key.uniquequalnodeid = hashExpr((Expr *) expr, context, pgqs_track_constants); key.queryid = context->queryId; key.evaltype = context->evaltype; /* local hash, no lock needed */ entry = (pgqsEntry *) hash_search(pgqs_localhash, &key, HASH_ENTER, &found); if (!found) { context->nentries++; pgqs_entry_init(entry); entry->qualnodeid = hashExpr((Expr *) expr, context, false); entry->qualid = context->qualid; entry->opoid = opoid; if (rte->rtekind == RTE_RELATION) { entry->lrelid = rte->relid; entry->lattnum = var->varattno; } if (pgqs_track_constants) { char *utf8const = (char *) pg_do_encoding_conversion((unsigned char *) constant, strlen(constant), GetDatabaseEncoding(), PG_UTF8); Assert(strlen(utf8const) < PGQS_CONSTANT_SIZE); strcpy(entry->constvalue, utf8const); } else memset(entry->constvalue, 0, sizeof(char) * PGQS_CONSTANT_SIZE); if (pgqs_resolve_oids) pgqs_fillnames((pgqsEntryWithNames *) entry); } entry->nbfiltered += context->nbfiltered; entry->count += context->count; entry->usage += 1; /* compute estimation error min, max, mean and variance */ pgqs_entry_err_estim(entry, context->err_estim, 1); return entry; } static void get_const_expr(Const *constval, StringInfo buf) { Oid typoutput; bool typIsVarlena; char *extval; if (constval->constisnull) { /* * Always label the type of a NULL constant to prevent misdecisions * about type when reparsing. */ appendStringInfoString(buf, "NULL"); appendStringInfo(buf, "::%s", format_type_with_typemod(constval->consttype, constval->consttypmod)); return; } getTypeOutputInfo(constval->consttype, &typoutput, &typIsVarlena); extval = OidOutputFunctionCall(typoutput, constval->constvalue); switch (constval->consttype) { case INT2OID: case INT4OID: case INT8OID: case OIDOID: case FLOAT4OID: case FLOAT8OID: case NUMERICOID: { /* * These types are printed without quotes unless they contain * values that aren't accepted by the scanner unquoted (e.g., * 'NaN'). Note that strtod() and friends might accept NaN, * so we can't use that to test. * * In reality we only need to defend against infinity and NaN, * so we need not get too crazy about pattern matching here. * * There is a special-case gotcha: if the constant is signed, * we need to parenthesize it, else the parser might see a * leading plus/minus as binding less tightly than adjacent * operators --- particularly, the cast that we might attach * below. */ if (strspn(extval, "0123456789+-eE.") == strlen(extval)) { if (extval[0] == '+' || extval[0] == '-') appendStringInfo(buf, "(%s)", extval); else appendStringInfoString(buf, extval); } else appendStringInfo(buf, "'%s'", extval); } break; case BITOID: case VARBITOID: appendStringInfo(buf, "B'%s'", extval); break; case BOOLOID: if (strcmp(extval, "t") == 0) appendStringInfoString(buf, "true"); else appendStringInfoString(buf, "false"); break; default: appendStringInfoString(buf, quote_literal_cstr(extval)); break; } pfree(extval); /* * For showtype == 0, append ::typename unless the constant will be * implicitly typed as the right type when it is read in. */ appendStringInfo(buf, "::%s", format_type_with_typemod(constval->consttype, constval->consttypmod)); } /*----------- * In order to avoid duplicated entries for sementically equivalent OpExpr, * this function returns a canonical version of the given OpExpr. * * For now, the only modification is for OpExpr with a Var and a Const, we * prefer the form: * Var operator Const * with the Var on the LHS. If the expression in the opposite form and the * operator has a commutator, we'll commute it, otherwise fallback to the * original OpExpr with the Var on the RHS. * OpExpr of the form Var operator Var can still be redundant. */ static OpExpr * pgqs_get_canonical_opexpr(OpExpr *expr, bool *commuted) { if (commuted) *commuted = false; /* Only OpExpr with 2 arguments needs special processing. */ if (list_length(expr->args) != 2) return expr; /* If the 1st argument is a Var, nothing is done */ if (IsA(linitial(expr->args), Var)) return expr; /* If the 2nd argument is a Var, commute the OpExpr if possible */ if (IsA(lsecond(expr->args), Var) && OidIsValid(get_commutator(expr->opno))) { OpExpr *newexpr = copyObject(expr); CommuteOpExpr(newexpr); if (commuted) *commuted = true; return newexpr; } return expr; } static pgqsEntry * pgqs_process_opexpr(OpExpr *expr, pgqsWalkerContext *context) { /* do not store more than 20% of possible entries in shared mem */ if (context->nentries >= PGQS_MAX_LOCAL_ENTRIES) return NULL; if (list_length(expr->args) == 2) { bool save_qual; Node *node; Var *var; Const *constant; Oid *sreliddest; AttrNumber *sattnumdest; pgqsEntry tempentry; int step; pgqs_entry_init(&tempentry); tempentry.opoid = expr->opno; save_qual = false; var = NULL; /* will store the last Var found, if any */ constant = NULL; /* will store the last Constant found, if any */ /* setup the node and LHS destination fields for the 1st argument */ node = linitial(expr->args); sreliddest = &(tempentry.lrelid); sattnumdest = &(tempentry.lattnum); for (step = 0; step < 2; step++) { if (IsA(node, RelabelType)) node = (Node *) ((RelabelType *) node)->arg; if (IsA(node, Var)) node = (Node *) pgqs_resolve_var((Var *) node, context); switch (node->type) { case T_Var: var = (Var *) node; { RangeTblEntry *rte; rte = list_nth(context->rtable, var->varno - 1); if (rte->rtekind == RTE_RELATION) { save_qual = true; *sreliddest = rte->relid; *sattnumdest = var->varattno; } else var = NULL; } break; case T_Const: constant = (Const *) node; break; default: break; } /* find the node to process for the 2nd pass */ if (step == 0) { node = NULL; if (var == NULL) { bool commuted; OpExpr *newexpr = pgqs_get_canonical_opexpr(expr, &commuted); /* * If the OpExpr was commuted we have to use the 1st * argument of the new OpExpr, and keep using the LHS as * destination fields. */ if (commuted) { Assert(sreliddest == &(tempentry.lrelid)); Assert(sattnumdest == &(tempentry.lattnum)); node = linitial(newexpr->args); } } /* * If the 1st argument was a var, or if it wasn't and the * operator couldn't be commuted, use the 2nd argument and the * RHS as destination fields. */ if (node == NULL) { /* simply process the next argument */ node = lsecond(expr->args); /* * a Var was found and stored on the LHS, so if the next * node will be stored on the RHS */ sreliddest = &(tempentry.rrelid); sattnumdest = &(tempentry.rattnum); } } } if (save_qual) { pgqsHashKey key; pgqsEntry *entry; StringInfo buf = makeStringInfo(); bool found; int position = -1; /* * If we don't track rels in the pg_catalog schema, lookup the * schema to make sure its not pg_catalog. Otherwise, bail out. */ if (!pgqs_track_pgcatalog) { Oid nsp; if (tempentry.lrelid != InvalidOid) { nsp = get_rel_namespace(tempentry.lrelid); Assert(OidIsValid(nsp)); if (nsp == PG_CATALOG_NAMESPACE) return NULL; } if (tempentry.rrelid != InvalidOid) { nsp = get_rel_namespace(tempentry.rrelid); Assert(OidIsValid(nsp)); if (nsp == PG_CATALOG_NAMESPACE) return NULL; } } if (constant != NULL && pgqs_track_constants) { get_const_expr(constant, buf); position = constant->location; } memset(&key, 0, sizeof(pgqsHashKey)); key.userid = GetUserId(); key.dbid = MyDatabaseId; key.uniquequalid = context->uniquequalid; key.uniquequalnodeid = hashExpr((Expr *) expr, context, pgqs_track_constants); key.queryid = context->queryId; key.evaltype = context->evaltype; /* local hash, no lock needed */ entry = (pgqsEntry *) hash_search(pgqs_localhash, &key, HASH_ENTER, &found); if (!found) { char *utf8const; int len; context->nentries++; /* raw copy the temporary entry */ pgqs_entry_copy_raw(entry, &tempentry); entry->position = position; entry->qualnodeid = hashExpr((Expr *) expr, context, false); entry->qualid = context->qualid; utf8const = (char *) pg_do_encoding_conversion((unsigned char *) buf->data, strlen(buf->data), GetDatabaseEncoding(), PG_UTF8); len = strlen(utf8const); /* * The const value can use multibyte characters, so we need to * be careful when truncating the value. Note that we need to * use PG_UTF8 encoding explicitly here, as the value was just * converted to this encoding. */ len = pg_encoding_mbcliplen(PG_UTF8, utf8const, len, PGQS_CONSTANT_SIZE - 1); memcpy(entry->constvalue, utf8const, len); entry->constvalue[len] = '\0'; if (pgqs_resolve_oids) pgqs_fillnames((pgqsEntryWithNames *) entry); } entry->nbfiltered += context->nbfiltered; entry->count += context->count; entry->usage += 1; /* compute estimation error min, max, mean and variance */ pgqs_entry_err_estim(entry, context->err_estim, 1); return entry; } } return NULL; } static bool pgqs_whereclause_tree_walker(Node *node, pgqsWalkerContext *context) { if (node == NULL) return false; switch (node->type) { case T_BoolExpr: { BoolExpr *boolexpr = (BoolExpr *) node; if (boolexpr->boolop == NOT_EXPR) { /* Skip, and do not keep track of the qual */ uint32 previous_hash = context->qualid; uint32 previous_uniquequalnodeid = context->uniquequalid; context->qualid = 0; context->uniquequalid = 0; expression_tree_walker((Node *) boolexpr->args, pgqs_whereclause_tree_walker, context); context->qualid = previous_hash; context->uniquequalid = previous_uniquequalnodeid; return false; } else if (boolexpr->boolop == OR_EXPR) { context->qualid = 0; context->uniquequalid = 0; } else if (boolexpr->boolop == AND_EXPR) { context->uniquequalid = hashExpr((Expr *) boolexpr, context, pgqs_track_constants); context->qualid = hashExpr((Expr *) boolexpr, context, false); } expression_tree_walker((Node *) boolexpr->args, pgqs_whereclause_tree_walker, context); return false; } case T_OpExpr: pgqs_process_opexpr((OpExpr *) node, context); return false; case T_ScalarArrayOpExpr: pgqs_process_scalararrayopexpr((ScalarArrayOpExpr *) node, context); return false; case T_BooleanTest: pgqs_process_booltest((BooleanTest *) node, context); return false; default: expression_tree_walker(node, pgqs_whereclause_tree_walker, context); return false; } } static void pgqs_backend_mode_startup(void) { HASHCTL info; HASHCTL queryinfo; memset(&info, 0, sizeof(info)); memset(&queryinfo, 0, sizeof(queryinfo)); info.keysize = sizeof(pgqsHashKey); info.hcxt = TopMemoryContext; queryinfo.keysize = sizeof(pgqsQueryStringHashKey); queryinfo.entrysize = sizeof(pgqsQueryStringEntry) + pgqs_query_size * sizeof(char); queryinfo.hcxt = TopMemoryContext; if (pgqs_resolve_oids) info.entrysize = sizeof(pgqsEntryWithNames); else info.entrysize = sizeof(pgqsEntry); info.hash = pgqs_hash_fn; pgqs_hash = hash_create("pg_qualstatements_hash", pgqs_max, &info, HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT); pgqs_query_examples_hash = hash_create("pg_qualqueryexamples_hash", pgqs_max, &queryinfo, /* On PG > 9.5, use the HASH_BLOBS optimization for uint32 keys. */ #if PG_VERSION_NUM >= 90500 HASH_ELEM | HASH_BLOBS | HASH_CONTEXT); #else HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT); #endif } #if PG_VERSION_NUM >= 150000 static void pgqs_shmem_request(void) { if (prev_shmem_request_hook) prev_shmem_request_hook(); Assert(!pgqs_backend); RequestAddinShmemSpace(pgqs_memsize()); RequestNamedLWLockTranche("pg_qualstats", 3); } #endif static void pgqs_shmem_startup(void) { HASHCTL info; HASHCTL queryinfo; bool found; Assert(!pgqs_backend); if (prev_shmem_startup_hook) prev_shmem_startup_hook(); pgqs = NULL; LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE); pgqs = ShmemInitStruct("pg_qualstats", (sizeof(pgqsSharedState) #if PG_VERSION_NUM >= 90600 + pgqs_sampled_array_size() #endif ), &found); memset(&info, 0, sizeof(info)); memset(&queryinfo, 0, sizeof(queryinfo)); info.keysize = sizeof(pgqsHashKey); queryinfo.keysize = sizeof(pgqsQueryStringHashKey); queryinfo.entrysize = sizeof(pgqsQueryStringEntry) + pgqs_query_size * sizeof(char); if (pgqs_resolve_oids) info.entrysize = sizeof(pgqsEntryWithNames); else info.entrysize = sizeof(pgqsEntry); info.hash = pgqs_hash_fn; if (!found) { /* First time through ... */ #if PG_VERSION_NUM >= 90600 LWLockPadded *locks = GetNamedLWLockTranche("pg_qualstats"); pgqs->lock = &(locks[0]).lock; pgqs->querylock = &(locks[1]).lock; pgqs->sampledlock = &(locks[2]).lock; /* mark all backends as not sampled */ memset(pgqs->sampled, 0, pgqs_sampled_array_size()); #else pgqs->lock = LWLockAssign(); pgqs->querylock = LWLockAssign(); #endif } #if PG_VERSION_NUM < 90500 queryinfo.hash = pgqs_uint32_hashfn; #endif pgqs_hash = ShmemInitHash("pg_qualstatements_hash", pgqs_max, pgqs_max, &info, HASH_ELEM | HASH_FUNCTION | HASH_FIXED_SIZE); pgqs_query_examples_hash = ShmemInitHash("pg_qualqueryexamples_hash", pgqs_max, pgqs_max, &queryinfo, /* On PG > 9.5, use the HASH_BLOBS optimization for uint32 keys. */ #if PG_VERSION_NUM >= 90500 HASH_ELEM | HASH_BLOBS | HASH_FIXED_SIZE); #else HASH_ELEM | HASH_FUNCTION | HASH_FIXED_SIZE); #endif LWLockRelease(AddinShmemInitLock); } Datum pg_qualstats_reset(PG_FUNCTION_ARGS) { HASH_SEQ_STATUS hash_seq; pgqsEntry *entry; if ((!pgqs && !pgqs_backend) || !pgqs_hash) { ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("pg_qualstats must be loaded via shared_preload_libraries"))); } PGQS_LWL_ACQUIRE(pgqs->lock, LW_EXCLUSIVE); hash_seq_init(&hash_seq, pgqs_hash); while ((entry = hash_seq_search(&hash_seq)) != NULL) { hash_search(pgqs_hash, &entry->key, HASH_REMOVE, NULL); } PGQS_LWL_RELEASE(pgqs->lock); PG_RETURN_VOID(); } /* Number of output arguments (columns) for various API versions */ #define PG_QUALSTATS_COLS_V1_0 18 #define PG_QUALSTATS_COLS_V2_0 26 #define PG_QUALSTATS_COLS 26 /* maximum of above */ /* * Retrieve statement statistics. * * The SQL API of this function has changed multiple times, and will likely * do so again in future. To support the case where a newer version of this * loadable module is being used with an old SQL declaration of the function, * we continue to support the older API versions. For 2.0.X and later, the * expected API version is identified by embedding it in the C name of the * function. Unfortunately we weren't bright enough to do that for older * versions. */ Datum pg_qualstats_2_0(PG_FUNCTION_ARGS) { return pg_qualstats_common(fcinfo, PGQS_V2_0, false); } Datum pg_qualstats_names_2_0(PG_FUNCTION_ARGS) { return pg_qualstats_common(fcinfo, PGQS_V2_0, true); } Datum pg_qualstats(PG_FUNCTION_ARGS) { return pg_qualstats_common(fcinfo, PGQS_V1_0, false); } Datum pg_qualstats_names(PG_FUNCTION_ARGS) { return pg_qualstats_common(fcinfo, PGQS_V1_0, true); } Datum pg_qualstats_common(PG_FUNCTION_ARGS, pgqsVersion api_version, bool include_names) { ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; int nb_columns; TupleDesc tupdesc; Tuplestorestate *tupstore; MemoryContext per_query_ctx; MemoryContext oldcontext; HASH_SEQ_STATUS hash_seq; Oid userid = GetUserId(); bool is_allowed_role = false; pgqsEntry *entry; Datum *values; bool *nulls; #if PG_VERSION_NUM >= 140000 /* Superusers or members of pg_read_all_stats members are allowed */ is_allowed_role = is_member_of_role(GetUserId(), ROLE_PG_READ_ALL_STATS); #elif PG_VERSION_NUM >= 100000 /* Superusers or members of pg_read_all_stats members are allowed */ is_allowed_role = is_member_of_role(GetUserId(), DEFAULT_ROLE_READ_ALL_STATS); #else /* Superusers are allowed */ is_allowed_role = superuser(); #endif if ((!pgqs && !pgqs_backend) || !pgqs_hash) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("pg_qualstats must be loaded via shared_preload_libraries"))); /* check to see if caller supports us returning a tuplestore */ if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("set-valued function called in context that cannot accept a set"))); if (!(rsinfo->allowedModes & SFRM_Materialize)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("materialize mode required, but it is not " \ "allowed in this context"))); per_query_ctx = rsinfo->econtext->ecxt_per_query_memory; oldcontext = MemoryContextSwitchTo(per_query_ctx); if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) elog(ERROR, "return type must be a row type"); /* Check we have the expected number of output arguments. */ switch (tupdesc->natts) { case PG_QUALSTATS_COLS_V1_0: case PG_QUALSTATS_COLS_V1_0 + PGQS_NAME_COLUMNS: if (api_version != PGQS_V1_0) elog(ERROR, "incorrect number of output arguments"); break; case PG_QUALSTATS_COLS_V2_0: case PG_QUALSTATS_COLS_V2_0 + PGQS_NAME_COLUMNS: if (api_version != PGQS_V2_0) elog(ERROR, "incorrect number of output arguments"); break; default: elog(ERROR, "incorrect number of output arguments"); } tupstore = tuplestore_begin_heap(true, false, work_mem); rsinfo->returnMode = SFRM_Materialize; rsinfo->setResult = tupstore; rsinfo->setDesc = tupdesc; PGQS_LWL_ACQUIRE(pgqs->lock, LW_SHARED); hash_seq_init(&hash_seq, pgqs_hash); if (api_version == PGQS_V1_0) nb_columns = PG_QUALSTATS_COLS_V1_0; else nb_columns = PG_QUALSTATS_COLS_V2_0; if (include_names) nb_columns += PGQS_NAME_COLUMNS; Assert(nb_columns == tupdesc->natts); values = palloc0(sizeof(Datum) * nb_columns); nulls = palloc0(sizeof(bool) * nb_columns); while ((entry = hash_seq_search(&hash_seq)) != NULL) { int i = 0; memset(values, 0, sizeof(Datum) * nb_columns); memset(nulls, 0, sizeof(bool) * nb_columns); values[i++] = ObjectIdGetDatum(entry->key.userid); values[i++] = ObjectIdGetDatum(entry->key.dbid); if (entry->lattnum != InvalidAttrNumber) { values[i++] = ObjectIdGetDatum(entry->lrelid); values[i++] = Int16GetDatum(entry->lattnum); } else { nulls[i++] = true; nulls[i++] = true; } values[i++] = Int32GetDatum(entry->opoid); if (entry->rattnum != InvalidAttrNumber) { values[i++] = ObjectIdGetDatum(entry->rrelid); values[i++] = Int16GetDatum(entry->rattnum); } else { nulls[i++] = true; nulls[i++] = true; } if (entry->qualid == 0) nulls[i++] = true; else values[i++] = Int64GetDatum(entry->qualid); if (entry->key.uniquequalid == 0) nulls[i++] = true; else values[i++] = Int64GetDatum(entry->key.uniquequalid); values[i++] = Int64GetDatum(entry->qualnodeid); values[i++] = Int64GetDatum(entry->key.uniquequalnodeid); values[i++] = Int64GetDatum(entry->occurences); values[i++] = Int64GetDatum(entry->count); values[i++] = Int64GetDatum(entry->nbfiltered); if (api_version >= PGQS_V2_0) { int j; for (j = 0; j < 2; j++) { double stddev_estim; if (j == PGQS_RATIO) /* min/max ratio are double precision */ { values[i++] = Float8GetDatum(entry->min_err_estim[j]); values[i++] = Float8GetDatum(entry->max_err_estim[j]); } else /* min/max num are bigint */ { values[i++] = Int64GetDatum(entry->min_err_estim[j]); values[i++] = Int64GetDatum(entry->max_err_estim[j]); } values[i++] = Float8GetDatum(entry->mean_err_estim[j]); if (entry->occurences > 1) stddev_estim = sqrt(entry->sum_err_estim[j] / entry->occurences); else stddev_estim = 0.0; values[i++] = Float8GetDatumFast(stddev_estim); } } if (entry->position == -1) nulls[i++] = true; else values[i++] = Int32GetDatum(entry->position); if (entry->key.queryid == 0) nulls[i++] = true; else values[i++] = Int64GetDatum(entry->key.queryid); if (entry->constvalue[0] != '\0') { if (is_allowed_role || entry->key.userid == userid) { values[i++] = CStringGetTextDatum((char *) pg_do_encoding_conversion( (unsigned char *) entry->constvalue, strlen(entry->constvalue), PG_UTF8, GetDatabaseEncoding())); } else { /* * Don't show constant text, but hint as to the reason for not * doing so */ values[i++] = CStringGetTextDatum(""); } } else nulls[i++] = true; if (entry->key.evaltype) values[i++] = CharGetDatum(entry->key.evaltype); else nulls[i++] = true; if (include_names) { if (pgqs_resolve_oids) { pgqsNames names = ((pgqsEntryWithNames *) entry)->names; values[i++] = CStringGetTextDatum(NameStr(names.rolname)); values[i++] = CStringGetTextDatum(NameStr(names.datname)); values[i++] = CStringGetTextDatum(NameStr(names.lrelname)); values[i++] = CStringGetTextDatum(NameStr(names.lattname)); values[i++] = CStringGetTextDatum(NameStr(names.opname)); values[i++] = CStringGetTextDatum(NameStr(names.rrelname)); values[i++] = CStringGetTextDatum(NameStr(names.rattname)); } else { for (; i < nb_columns; i++) nulls[i] = true; } } Assert(i == nb_columns); tuplestore_putvalues(tupstore, tupdesc, values, nulls); } PGQS_LWL_RELEASE(pgqs->lock); MemoryContextSwitchTo(oldcontext); return (Datum) 0; } Datum pg_qualstats_example_query(PG_FUNCTION_ARGS) { #if PG_VERSION_NUM >= 110000 pgqs_queryid queryid = PG_GETARG_INT64(0); #else pgqs_queryid queryid = PG_GETARG_UINT32(0); #endif pgqsQueryStringEntry *entry; pgqsQueryStringHashKey queryKey; bool found; if ((!pgqs && !pgqs_backend) || !pgqs_hash) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("pg_qualstats must be loaded via shared_preload_libraries"))); /* don't search the hash table if track_constants isn't enabled */ if (!pgqs_track_constants) PG_RETURN_NULL(); queryKey.queryid = queryid; PGQS_LWL_ACQUIRE(pgqs->querylock, LW_SHARED); entry = hash_search_with_hash_value(pgqs_query_examples_hash, &queryKey, queryid, HASH_FIND, &found); PGQS_LWL_RELEASE(pgqs->querylock); if (found) PG_RETURN_TEXT_P(cstring_to_text(entry->querytext)); else PG_RETURN_NULL(); } Datum pg_qualstats_example_queries(PG_FUNCTION_ARGS) { ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; TupleDesc tupdesc; Tuplestorestate *tupstore; MemoryContext per_query_ctx; MemoryContext oldcontext; HASH_SEQ_STATUS hash_seq; pgqsQueryStringEntry *entry; if ((!pgqs && !pgqs_backend) || !pgqs_query_examples_hash) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("pg_qualstats must be loaded via shared_preload_libraries"))); /* check to see if caller supports us returning a tuplestore */ if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("set-valued function called in context that cannot accept a set"))); if (!(rsinfo->allowedModes & SFRM_Materialize)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("materialize mode required, but it is not " \ "allowed in this context"))); per_query_ctx = rsinfo->econtext->ecxt_per_query_memory; oldcontext = MemoryContextSwitchTo(per_query_ctx); if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) elog(ERROR, "return type must be a row type"); tupstore = tuplestore_begin_heap(true, false, work_mem); rsinfo->returnMode = SFRM_Materialize; rsinfo->setResult = tupstore; rsinfo->setDesc = tupdesc; MemoryContextSwitchTo(oldcontext); /* don't need to scan the hash table if track_constants isn't enabled */ if (!pgqs_track_constants) return (Datum) 0; PGQS_LWL_ACQUIRE(pgqs->querylock, LW_SHARED); hash_seq_init(&hash_seq, pgqs_query_examples_hash); while ((entry = hash_seq_search(&hash_seq)) != NULL) { Datum values[2]; bool nulls[2]; int64 queryid = entry->key.queryid; memset(values, 0, sizeof(values)); memset(nulls, 0, sizeof(nulls)); values[0] = Int64GetDatumFast(queryid); values[1] = CStringGetTextDatum(entry->querytext); tuplestore_putvalues(tupstore, tupdesc, values, nulls); } PGQS_LWL_RELEASE(pgqs->querylock); return (Datum) 0; } /* * Calculate hash value for a key */ static uint32 pgqs_hash_fn(const void *key, Size keysize) { const pgqsHashKey *k = (const pgqsHashKey *) key; return hash_uint32((uint32) k->userid) ^ hash_uint32((uint32) k->dbid) ^ hash_uint32((uint32) k->queryid) ^ hash_uint32((uint32) k->uniquequalnodeid) ^ hash_uint32((uint32) k->uniquequalid) ^ hash_uint32((uint32) k->evaltype); } static void pgqs_set_planstates(PlanState *planstate, pgqsWalkerContext *context) { context->outer_tlist = NIL; context->inner_tlist = NIL; context->index_tlist = NIL; context->outer_planstate = NULL; context->inner_planstate = NULL; context->planstate = planstate; if (IsA(planstate, AppendState)) { AppendState * appendstate = (AppendState *) planstate; if (appendstate->as_nplans > 0) context->outer_planstate = appendstate->appendplans[0]; } else if (IsA(planstate, MergeAppendState)) { MergeAppendState * mergeappendstate = (MergeAppendState *) planstate; if (mergeappendstate->ms_nplans > 0) context->outer_planstate = mergeappendstate->mergeplans[0]; } #if PG_VERSION_NUM < 140000 else if (IsA(planstate, ModifyTableState)) { context->outer_planstate = ((ModifyTableState *) planstate)->mt_plans[0]; } #endif else context->outer_planstate = outerPlanState(planstate); if (context->outer_planstate) context->outer_tlist = context->outer_planstate->plan->targetlist; else context->outer_tlist = NIL; if (IsA(planstate, SubqueryScanState)) context->inner_planstate = ((SubqueryScanState *) planstate)->subplan; else if (IsA(planstate, CteScanState)) context->inner_planstate = ((CteScanState *) planstate)->cteplanstate; else context->inner_planstate = innerPlanState(planstate); if (context->inner_planstate) context->inner_tlist = context->inner_planstate->plan->targetlist; else context->inner_tlist = NIL; /* index_tlist is set only if it's an IndexOnlyScan */ if (IsA(planstate->plan, IndexOnlyScan)) context->index_tlist = ((IndexOnlyScan *) planstate->plan)->indextlist; #if PG_VERSION_NUM >= 90500 else if (IsA(planstate->plan, ForeignScan)) context->index_tlist = ((ForeignScan *) planstate->plan)->fdw_scan_tlist; else if (IsA(planstate->plan, CustomScan)) context->index_tlist = ((CustomScan *) planstate->plan)->custom_scan_tlist; #endif else context->index_tlist = NIL; } static Expr * pgqs_resolve_var(Var *var, pgqsWalkerContext *context) { List *tlist = NULL; PlanState *planstate = context->planstate; pgqs_set_planstates(context->planstate, context); switch (var->varno) { case INNER_VAR: tlist = context->inner_tlist; break; case OUTER_VAR: tlist = context->outer_tlist; break; case INDEX_VAR: tlist = context->index_tlist; break; default: return (Expr *) var; } if (tlist != NULL) { TargetEntry *entry = get_tle_by_resno(tlist, var->varattno); if (entry != NULL) { Var *newvar = (Var *) (entry->expr); if (var->varno == OUTER_VAR) pgqs_set_planstates(context->outer_planstate, context); if (var->varno == INNER_VAR) pgqs_set_planstates(context->inner_planstate, context); var = (Var *) pgqs_resolve_var(newvar, context); } } Assert(!(IsA(var, Var) && IS_SPECIAL_VARNO(var->varno))); /* If the result is something OTHER than a var, replace it by a constexpr */ if (!IsA(var, Var)) { Const *consttext; consttext = (Const *) makeConst(TEXTOID, -1, -1, -1, CStringGetTextDatum(nodeToString(var)), false, false); var = (Var *) consttext; } pgqs_set_planstates(planstate, context); return (Expr *) var; } /* * Estimate shared memory space needed. */ static Size pgqs_memsize(void) { Size size; size = MAXALIGN(sizeof(pgqsSharedState)); if (pgqs_resolve_oids) size = add_size(size, hash_estimate_size(pgqs_max, sizeof(pgqsEntryWithNames))); else size = add_size(size, hash_estimate_size(pgqs_max, sizeof(pgqsEntry))); if (pgqs_track_constants) { /* * In that case, we also need an additional struct for storing * non-normalized queries. */ size = add_size(size, hash_estimate_size(pgqs_max, sizeof(pgqsQueryStringEntry) + pgqs_query_size * sizeof(char))); } #if PG_VERSION_NUM >= 90600 size = add_size(size, MAXALIGN(pgqs_sampled_array_size())); #endif return size; } #if PG_VERSION_NUM >= 90600 static Size pgqs_sampled_array_size(void) { int _maxbackends; #if PG_VERSION_NUM >= 150000 Assert(MaxBackends > 0); _maxbackends = MaxBackends; #else int32 _autovac_max_workers; int32 _max_wal_senders; const char *guc_string; /* * autovacuum_max_workers and max_wal_senders aren't declared as * PGDLLIMPORT in pg15- versions, so retrieve them using GetConfigOption to * allow compilation on Windows. */ guc_string = GetConfigOption("autovacuum_max_workers", false, true); _autovac_max_workers = pg_atoi(guc_string, 4, 0); Assert(_autovac_max_workers >= 1 && _autovac_max_workers <= MAX_BACKENDS); guc_string = GetConfigOption("max_wal_senders", false, true); _max_wal_senders = pg_atoi(guc_string, 4, 0); Assert(_max_wal_senders >= 0 && _max_wal_senders <= MAX_BACKENDS); /* * Parallel workers need to be sampled if their original query is also * sampled. We store in shared mem the sample state for each query, * identified by their BackendId. If need room for all possible backends, * plus autovacuum launcher and workers, plus bg workers. */ _maxbackends = MaxConnections + _autovac_max_workers + 1 + max_worker_processes #if PG_VERSION_NUM >= 120000 /* * Starting with pg12, max_wal_senders isn't part * of max_connections anymore */ + _max_wal_senders #endif /* pg12+ */ + 1; #endif /* pg15- */ /* We need an extra value since BackendId numerotationn starts at 1. */ return (sizeof(bool) * (_maxbackends + 1)); } #endif static uint32 hashExpr(Expr *expr, pgqsWalkerContext *context, bool include_const) { StringInfo buffer = makeStringInfo(); exprRepr(expr, buffer, context, include_const); return hash_any((unsigned char *) buffer->data, buffer->len); } static void exprRepr(Expr *expr, StringInfo buffer, pgqsWalkerContext *context, bool include_const) { ListCell *lc; if (expr == NULL) return; appendStringInfo(buffer, "%d-", expr->type); if (IsA(expr, Var)) expr = pgqs_resolve_var((Var *) expr, context); switch (expr->type) { case T_List: foreach(lc, (List *) expr) exprRepr((Expr *) lfirst(lc), buffer, context, include_const); break; case T_OpExpr: { OpExpr *opexpr; opexpr = pgqs_get_canonical_opexpr((OpExpr *) expr, NULL); appendStringInfo(buffer, "%d", opexpr->opno); exprRepr((Expr *) opexpr->args, buffer, context, include_const); break; } case T_Var: { Var *var = (Var *) expr; RangeTblEntry *rte = list_nth(context->rtable, var->varno - 1); if (rte->rtekind == RTE_RELATION) appendStringInfo(buffer, "%d;%d", rte->relid, var->varattno); else appendStringInfo(buffer, "NORTE%d;%d", var->varno, var->varattno); } break; case T_BoolExpr: appendStringInfo(buffer, "%d", ((BoolExpr *) expr)->boolop); exprRepr((Expr *) ((BoolExpr *) expr)->args, buffer, context, include_const); break; case T_BooleanTest: if (include_const) appendStringInfo(buffer, "%d", ((BooleanTest *) expr)->booltesttype); exprRepr((Expr *) ((BooleanTest *) expr)->arg, buffer, context, include_const); break; case T_Const: if (include_const) get_const_expr((Const *) expr, buffer); else appendStringInfoChar(buffer, '?'); break; case T_CoerceViaIO: exprRepr((Expr *) ((CoerceViaIO *) expr)->arg, buffer, context, include_const); appendStringInfo(buffer, "|%d", ((CoerceViaIO *) expr)->resulttype); break; case T_FuncExpr: appendStringInfo(buffer, "|%d(", ((FuncExpr *) expr)->funcid); exprRepr((Expr *) ((FuncExpr *) expr)->args, buffer, context, include_const); appendStringInfoString(buffer, ")"); break; case T_MinMaxExpr: appendStringInfo(buffer, "|minmax%d(", ((MinMaxExpr *) expr)->op); exprRepr((Expr *) ((MinMaxExpr *) expr)->args, buffer, context, include_const); appendStringInfoString(buffer, ")"); break; default: appendStringInfoString(buffer, nodeToString(expr)); } } #if PG_VERSION_NUM < 90500 static uint32 pgqs_uint32_hashfn(const void *key, Size keysize) { return ((pgqsQueryStringHashKey *) key)->queryid; } #endif pg_qualstats-2.1.1/pg_qualstats.control000066400000000000000000000002221467511452200203360ustar00rootroot00000000000000comment = 'An extension collecting statistics about quals' default_version = '2.1.1' module_pathname = '$libdir/pg_qualstats' relocatable = false pg_qualstats-2.1.1/test/000077500000000000000000000000001467511452200152105ustar00rootroot00000000000000pg_qualstats-2.1.1/test/sql/000077500000000000000000000000001467511452200160075ustar00rootroot00000000000000pg_qualstats-2.1.1/test/sql/pg_qualstats.sql000066400000000000000000000065611467511452200212470ustar00rootroot00000000000000CREATE SCHEMA "PGQS"; CREATE EXTENSION pg_qualstats WITH SCHEMA "PGQS"; -- Make sure that installcheck won't find previous data SELECT "PGQS".pg_qualstats_reset(); -- Make sure sure we'll see at least one qual SET pg_qualstats.sample_rate = 1; CREATE TABLE pgqs AS SELECT id, 'a'::text val FROM generate_series(1, 100) id; SELECT COUNT(*) FROM pgqs WHERE id = 1; SELECT lrelid::regclass::text, lattnum, occurences, execution_count, nbfiltered, constvalue, eval_type FROM "PGQS".pg_qualstats; SELECT COUNT(*) > 0 FROM "PGQS".pg_qualstats; SELECT COUNT(*) > 0 FROM "PGQS".pg_qualstats(); SELECT COUNT(*) > 0 FROM "PGQS".pg_qualstats_example_queries(); SELECT "PGQS".pg_qualstats_reset(); SELECT COUNT(*) FROM "PGQS".pg_qualstats(); -- OpExpr sanity checks -- subquery_var operator const, shouldn't be tracked SELECT * FROM (SELECT * FROM pgqs LIMIT 0) pgqs WHERE pgqs.id = 0; SELECT COUNT(*) FROM "PGQS".pg_qualstats(); -- const non_commutable_operator var, should be tracked, var found on RHS SELECT * FROM pgqs WHERE 'meh' ~ val; SELECT lrelid::regclass, lattnum, rrelid::regclass, rattnum FROM "PGQS".pg_qualstats(); SELECT "PGQS".pg_qualstats_reset(); -- opexpr operator var and commuted, shouldn't be tracked SELECT * FROM pgqs WHERE id % 2 = 3; SELECT * FROM pgqs WHERE 3 = id % 2; SELECT COUNT(*) FROM "PGQS".pg_qualstats(); -- same query with handled commuted qual, which should be found as identical SELECT * FROM pgqs WHERE id = 0; SELECT * FROM pgqs WHERE 0 = id; SELECT lrelid::regclass, lattnum, rrelid::regclass, rattnum, sum(occurences) FROM "PGQS".pg_qualstats() GROUP by 1, 2, 3, 4; SELECT COUNT(DISTINCT qualnodeid) FROM "PGQS".pg_qualstats(); -- (unique)qualid behavior SELECT "PGQS".pg_qualstats_reset(); -- There should be one group of 2 AND-ed quals, and 1 qual alone SELECT COUNT(*) FROM pgqs WHERE (id = 1) OR (id > 10 AND id < 20); SELECT CASE WHEN qualid IS NULL THEN 'OR-ed' ELSE 'AND-ed' END kind, COUNT(*) FROM "PGQS".pg_qualstats() GROUP BY 1 ORDER BY 2 DESC; ---------------- -- index advisor ---------------- -- check that empty arrays are returned rather than NULL values SELECT "PGQS".pg_qualstats_reset(); SELECT * FROM "PGQS".pg_qualstats_index_advisor(50); -- Test some naive scenario CREATE TABLE adv (id1 integer, id2 integer, id3 integer, val text); INSERT INTO adv SELECT i, i, i, 'line ' || i from generate_series(1, 1000) i; SELECT "PGQS".pg_qualstats_reset(); SELECT * FROM adv WHERE id1 < 0; SELECT count(*) FROM adv WHERE id1 < 500; SELECT * FROM adv WHERE val = 'meh'; SELECT * FROM adv WHERE id1 = 0 and val = 'meh'; SELECT * FROM adv WHERE id1 = 1 and val = 'meh'; SELECT * FROM adv WHERE id1 = 1 and id2 = 2 AND val = 'meh'; SELECT * FROM adv WHERE id1 = 6 and id2 = 6 AND id3 = 6 AND val = 'meh'; SELECT COUNT(*) FROM pgqs WHERE id = 1; -- non optimisable statements SELECT * FROM adv WHERE val ILIKE 'moh'; SELECT count(*) FROM adv WHERE val ILIKE 'moh'; SELECT * FROM adv WHERE val LIKE 'moh'; -- check the results SELECT v->'ddl' AS v FROM json_array_elements( "PGQS".pg_qualstats_index_advisor(50)->'indexes') v ORDER BY v::text COLLATE "C"; SELECT v->'qual' AS v FROM json_array_elements( "PGQS".pg_qualstats_index_advisor(50)->'unoptimised') v ORDER BY v::text COLLATE "C"; -- check quals on removed table DROP TABLE pgqs; SELECT v->'ddl' AS v FROM json_array_elements( "PGQS".pg_qualstats_index_advisor(50)->'indexes') v ORDER BY v::text COLLATE "C";