pax_global_header00006660000000000000000000000064145030620340014507gustar00rootroot0000000000000052 comment=34af21412c8025532a83383b4a9f2965ea696e6e pgfincore-1.3.1/000077500000000000000000000000001450306203400134655ustar00rootroot00000000000000pgfincore-1.3.1/.github/000077500000000000000000000000001450306203400150255ustar00rootroot00000000000000pgfincore-1.3.1/.github/workflows/000077500000000000000000000000001450306203400170625ustar00rootroot00000000000000pgfincore-1.3.1/.github/workflows/main.yml000066400000000000000000000011631450306203400205320ustar00rootroot00000000000000name: CI on: push: branches: ['*'] pull_request: branches: ['*'] jobs: build: strategy: matrix: pg: - 16 - 15 - 14 - 13 - 12 - 11 - 10 - 9.6 - 9.5 - 9.4 name: 🐘 PostgreSQL ${{ matrix.pg }} runs-on: ubuntu-latest container: pgxn/pgxn-tools steps: - name: Start PostgreSQL ${{ matrix.pg }} run: pg-start ${{ matrix.pg }} - name: Check out the repo uses: actions/checkout@v4 - name: Test on PostgreSQL ${{ matrix.pg }} run: pg-build-testpgfincore-1.3.1/.gitignore000066400000000000000000000001271450306203400154550ustar00rootroot00000000000000.pc debian/files build-pgfincore-* debian/postgresql-* results/ *.so pgfincore*.tar.gz pgfincore-1.3.1/AUTHORS000066400000000000000000000005251450306203400145370ustar00rootroot00000000000000pgfincore is written by: * Cédric Villemain I take pg_relation_size code as a model, I look at the C interesting part from fincore (http://net.doit.wisc.edu/~plonka/fincore/), and I follow the great idea from http://www.kennygorman.com/wordpress/?p=246. In short, thank you Kenny Gorman, thank you Dave Plonka ! pgfincore-1.3.1/COPYRIGHT000066400000000000000000000030501450306203400147560ustar00rootroot00000000000000/* * Copyright (c) 2009-2016 Cédric Villemain * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the author nor the names of any co-contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY CONTRIBUTORS ``AS IS'' AND ANY EXPRESS * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL CONTRIBUTORS BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ pgfincore-1.3.1/ChangeLog000066400000000000000000000102041450306203400152340ustar00rootroot0000000000000021/09/2023 Cédric Villemain * 1.3.1 - drop support for upgrading from "unpackaged" 21/09/2023 Cédric Villemain * 1.3 - added support for PostgreSQL 16 - drop support for PostgreSQL < 9.4 2019-10-29 Cédric Villemain * 1.2.2 - Fix bad errno usage 22/09/2017 Cédric Villemain * 1.2.1 - Fix check on NULL input for drawer function 16/09/2016 Cédric Villemain * 1.2 - Prepared fincore syscall usage - Added 2 columns in pgfincore() with dirty status when available - Support PostgreSQL 9.6 - Added a drawer function 12/10/2013 Cédric Villemain * 1.1.2 - Fix README filename for PGXS - Update to PostgreSQL 9.3 - Fix faillure on NULL input (pgfadvise_loader) - Several fixes and layout changes 06/21/2012 Cédric Villemain * 1.1.2 - Change the open() call to use AllocateFile, and FreeFile 12/06/2011 Cédric Villemain * 1.1.1 - Fix Makefile again, as well as debian scripts (VPATH) - Add checks (make installcheck) - Improve .gitignore - Add a debian/watch file tracking pgfoundry release - Add regression files to VPATH build 09/07/2011 Cédric Villemain * 1.1.0 - Fix Makefile and remove the dir sql/ (useless and error prone) - Fix the printf of int64 by casting to long long int (i386 and adm64 behave differently with int64) - Updated to work with PostgreSQL 8.3 (TAKATSUKA Haruka) - Improve debian packaging (Dimitri Fontaine) - Add support for *BSD kernels - Remove mention of PGXS in the README 07/28/2011 Cédric Villemain * 1.0.0 - Output varbit containing vector information with pgfincore*() - Add Debian packaging (Dimitri Fontaine) - Update to work with PostgreSQL >= 9.1 (Jeff Janes) - Add total number of pages of memory with pgsysconf() - Add function pgsysconf_pretty() - Major rewrite of the functions - pgfadvise*() to handle simple posix_fadvise call - pgfadvise_loader() to restore file status (pages in/out cache) - pgfincore*() to handle mincore usage - pgsysconf*() to handle sysconf information - Use get_call_result_type() to build the tuple descriptor (suggested by RhodiumToad on IRC) - Remove limitation of usage on temp tables - Improve 9.1 installation (Extension) 04/30/2010 Cédric Villemain * 0.4.1 - use AllocateFile instead of fopen - call PG_GETARG* earlier - remove useless global counter - add error handler in pgfadv_snapshot() - errno to catch the last segment - improve Readme - some minor fix and beautify 01/05/2010 Cédric Villemain * 0.4.0 - fix test is not temp table - add posix_fadvise_willneed flag - add posix_fadvise_dontneed flag - add posix_fadvise_normal flag - add posix_fadvise_sequential flag - add posix_fadvise_random flag - rewrite main SRF - improve output (more informations) - fix copyright - add pgsysconf() - add pgmincore_snapshot to write mincore state in a file - add pgfadv_willneed_snapshot to read mincore state from file 10/26/2009 Cédric Villemain * 0.3.2 - fix fctx init 10/26/2009 Cédric Villemain * 0.3.1 - fix Makefile without PGXS - fix install doc in README 08/12/2009 Cédric Villemain * 0.3 - pgfincore now return a set of record - relname, relpath, block_disk, block_mem, group_mem - this version can only be build againt a postgresql > 8.3 /!\ 08/10/2009 Cédric Villemain * 0.2.1 - fix munmap call error 08/08/2009 Cédric Villemain * 0.2 - add support for 8.4 - fix mmap error when file is empty 06/29/2009 Cédric Villemain * 0.1.1 - cleaning and fixing 06/27/2009 Cédric Villemain * 0.1 - functions are working, basicaly. pgfincore-1.3.1/Makefile000066400000000000000000000010701450306203400151230ustar00rootroot00000000000000EXTENSION = pgfincore EXTVERSION = 1.3.1 MODULES = $(EXTENSION) MODULEDIR = $(EXTENSION) DOCS = README.md DATA = $(EXTENSION)--1.2--1.3.1.sql \ $(EXTENSION)--$(EXTVERSION).sql REGRESS = $(EXTENSION) PG_CONFIG = pg_config PGXS := $(shell $(PG_CONFIG) --pgxs) include $(PGXS) dist: git archive --prefix=$(EXTENSION)-$(EXTVERSION)/ -o ../$(EXTENSION)_$(EXTVERSION).orig.tar.gz HEAD deb: make clean pg_buildext updatecontrol make -f debian/rules debian/control dh clean make dist dpkg-buildpackage -us -uc pgfincore-1.3.1/README.md000066400000000000000000000343211450306203400147470ustar00rootroot00000000000000[![CI](https://github.com/klando/pgfincore/actions/workflows/main.yml/badge.svg?branch=master)](https://github.com/klando/pgfincore/actions/workflows/main.yml) # PgFincore -------------------------------------------------------------- A set of functions to manage pages in memory from PostgreSQL -------------------------------------------------------------- A set of functions to handle low-level management of relations using mincore to explore cache memory. ## DESCRIPTION With PostgreSQL, each Table or Index is splitted in segments of (usually) 1GB, and each segment is splitted in pages in memory then in blocks for the filesystem. Those functions let you know which and how many disk block from a relation are in the page cache of the operating system. It can provide the result as a VarBit and can be stored in a table. Then using this table, it is possible to restore the page cache state for each block of the relation, even in another server, thanks to Streaming Replication. Other functions are used to set a *POSIX_FADVISE* flag on the entire relation (each segment). The more usefull are probably *WILLNEED* and *DONTNEED* which push and pop blocks of each segments of a relation from page cache, respectively. Each functions are call with at least a table name or an index name (or oid) as a parameter and walk each segment of the relation. ## DOWNLOAD You can grab the latest code with git: git clone git://git.postgresql.org/git/pgfincore.git or git://github.com/klando/pgfincore.git And the project is on pgfoundry : http://pgfoundry.org/projects/pgfincore ## INSTALL From source code: make clean make su make install For PostgreSQL >= 9.1, log in your database and: mydb=# CREATE EXTENSION pgfincore; For other release, create the functions from the sql script (it should be in your contrib directory): psql mydb -f pgfincore.sql PgFincore is also shipped with Debian scripts to build your own package: aptitude install debhelper postgresql-server-dev-all postgresql-server-dev-9.1 # or postgresql-server-dev-8.4|postgresql-server-dev-9.0 make deb dpkg -i ../postgresql-9.1-pgfincore_1.1.1-1_amd64.deb PgFincore is packaged for *RPM* at http://yum.postgresql.org/ PgFincore is packaged for *debian* at http://pgapt.debian.net/ ## EXAMPLES Here are some examples of usage. If you want more details go to Documentation_ ### Get current state of a relation May be useful: cedric=# select * from pgfincore('pgbench_accounts'); relpath | segment | os_page_size | rel_os_pages | pages_mem | group_mem | os_pages_free | databit | pages_dirty | group_dirty --------------------+---------+--------------+--------------+-----------+-----------+---------------+---------+-------------+------------- base/11874/16447 | 0 | 4096 | 262144 | 262144 | 1 | 81016 | | 0 | 0 base/11874/16447.1 | 1 | 4096 | 65726 | 65726 | 1 | 81016 | | 0 | 0 (2 rows) Time: 31.563 ms ### Load a table or an index in OS Page Buffer You may want to try to keep a table or an index into the OS Page Cache, or preload a table before your well know big query is executed (reducing the query time). To do so, just execute the following query: cedric=# select * from pgfadvise_willneed('pgbench_accounts'); relpath | os_page_size | rel_os_pages | os_pages_free --------------------+--------------+--------------+--------------- base/11874/16447 | 4096 | 262144 | 169138 base/11874/16447.1 | 4096 | 65726 | 103352 (2 rows) Time: 4462,936 ms * The column *os_page_size* report that page size is 4KB. * The column *rel_os_pages* is the number of pages of the specified file. * The column *os_pages_free* is the number of free pages in memory (for caching). ### Snapshot and Restore the OS Page Buffer state of a table or an index (or more) You may want to restore a table or an index into the OS Page Cache as it was while you did the snapshot. For example if you have to reboot your server, then when PostgreSQL start up the first queries might be slower because neither PostgreSQL or the OS have pages in their respective cache about the relations involved in those first queries. Executing a snapshot and a restore is very simple: -- Snapshot cedric=# create table pgfincore_snapshot as cedric-# select 'pgbench_accounts'::text as relname,*,now() as date_snapshot cedric-# from pgfincore('pgbench_accounts',true); -- Restore cedric=# select * from pgfadvise_loader('pgbench_accounts', 0, true, true, (select databit from pgfincore_snapshot where relname='pgbench_accounts' and segment = 0)); relpath | os_page_size | os_pages_free | pages_loaded | pages_unloaded ------------------+--------------+---------------+--------------+---------------- base/11874/16447 | 4096 | 80867 | 262144 | 0 (1 row) Time: 35.349 ms * The column *pages_loaded* report how many pages have been read to memory (they may have already been in memoy) * The column *pages_unloaded* report how many pages have been removed from memory (they may not have already been in memoy); ## SYNOPSIS pgsysconf(OUT os_page_size bigint, OUT os_pages_free bigint, OUT os_total_pages bigint) RETURNS record pgsysconf_pretty(OUT os_page_size text, OUT os_pages_free text, OUT os_total_pages text) RETURNS record pgfadvise(IN relname regclass, IN fork text, IN action int, OUT relpath text, OUT os_page_size bigint, OUT rel_os_pages bigint, OUT os_pages_free bigint) RETURNS setof record pgfadvise_willneed(IN relname regclass, OUT relpath text, OUT os_page_size bigint, OUT rel_os_pages bigint, OUT os_pages_free bigint) RETURNS setof record pgfadvise_dontneed(IN relname regclass, OUT relpath text, OUT os_page_size bigint, OUT rel_os_pages bigint, OUT os_pages_free bigint) RETURNS setof record pgfadvise_normal(IN relname regclass, OUT relpath text, OUT os_page_size bigint, OUT rel_os_pages bigint, OUT os_pages_free bigint) RETURNS setof record pgfadvise_sequential(IN relname regclass, OUT relpath text, OUT os_page_size bigint, OUT rel_os_pages bigint, OUT os_pages_free bigint) RETURNS setof record pgfadvise_random(IN relname regclass, OUT relpath text, OUT os_page_size bigint, OUT rel_os_pages bigint, OUT os_pages_free bigint) RETURNS setof record pgfadvise_loader(IN relname regclass, IN fork text, IN segment int, IN load bool, IN unload bool, IN databit varbit, OUT relpath text, OUT os_page_size bigint, OUT os_pages_free bigint, OUT pages_loaded bigint, OUT pages_unloaded bigint) RETURNS setof record pgfadvise_loader(IN relname regclass, IN segment int, IN load bool, IN unload bool, IN databit varbit, OUT relpath text, OUT os_page_size bigint, OUT os_pages_free bigint, OUT pages_loaded bigint, OUT pages_unloaded bigint) RETURNS setof record pgfincore(IN relname regclass, IN fork text, IN getdatabit bool, OUT relpath text, OUT segment int, OUT os_page_size bigint, OUT rel_os_pages bigint, OUT pages_mem bigint, OUT group_mem bigint, OUT os_pages_free bigint, OUT databit varbit, OUT pages_dirty bigint, OUT group_dirty bigint) RETURNS setof record pgfincore(IN relname regclass, IN getdatabit bool, OUT relpath text, OUT segment int, OUT os_page_size bigint, OUT rel_os_pages bigint, OUT pages_mem bigint, OUT group_mem bigint, OUT os_pages_free bigint, OUT databit varbit, OUT pages_dirty bigint, OUT group_dirty bigint) RETURNS setof record pgfincore(IN relname regclass, OUT relpath text, OUT segment int, OUT os_page_size bigint, OUT rel_os_pages bigint, OUT pages_mem bigint, OUT group_mem bigint, OUT os_pages_free bigint, OUT databit varbit, OUT pages_dirty bigint, OUT group_dirty bigint) RETURNS setof record ## DOCUMENTATION ### pgsysconf This function output size of OS blocks, number of free page in the OS Page Buffer. cedric=# select * from pgsysconf(); os_page_size | os_pages_free | os_total_pages --------------+---------------+---------------- 4096 | 80431 | 4094174 ### pgsysconf_pretty The same as above, but with pretty output. cedric=# select * from pgsysconf_pretty(); os_page_size | os_pages_free | os_total_pages --------------+---------------+---------------- 4096 bytes | 314 MB | 16 GB ### pgfadvise_WILLNEED This function set *WILLNEED* flag on the current relation. It means that the Operating Sytem will try to load as much pages as possible of the relation. Main idea is to preload files on server startup, perhaps using cache hit/miss ratio or most required relations/indexes. cedric=# select * from pgfadvise_willneed('pgbench_accounts'); relpath | os_page_size | rel_os_pages | os_pages_free --------------------+--------------+--------------+--------------- base/11874/16447 | 4096 | 262144 | 80650 base/11874/16447.1 | 4096 | 65726 | 80650 ### pgfadvise_DONTNEED This function set *DONTNEED* flag on the current relation. It means that the Operating System will first unload pages of the file if it need to free some memory. Main idea is to unload files when they are not usefull anymore (instead of perhaps more interesting pages) cedric=# select * from pgfadvise_dontneed('pgbench_accounts'); relpath | os_page_size | rel_os_pages | os_pages_free --------------------+--------------+--------------+--------------- base/11874/16447 | 4096 | 262144 | 342071 base/11874/16447.1 | 4096 | 65726 | 408103 ### pgfadvise_NORMAL This function set *NORMAL* flag on the current relation. ### pgfadvise_SEQUENTIAL This function set *SEQUENTIAL* flag on the current relation. ### pgfadvise_RANDOM This function set *RANDOM* flag on the current relation. ### pgfadvise_loader This function allow to interact directly with the Page Cache. It can be used to load and/or unload page from memory based on a varbit representing the map of the pages to load/unload accordingly. Work with relation pgbench_accounts, segment 0, arbitrary varbit map: -- Loading and Unloading cedric=# select * from pgfadvise_loader('pgbench_accounts', 0, true, true, B'111000'); relpath | os_page_size | os_pages_free | pages_loaded | pages_unloaded ------------------+--------------+---------------+--------------+---------------- base/11874/16447 | 4096 | 408376 | 3 | 3 -- Loading cedric=# select * from pgfadvise_loader('pgbench_accounts', 0, true, false, B'111000'); relpath | os_page_size | os_pages_free | pages_loaded | pages_unloaded ------------------+--------------+---------------+--------------+---------------- base/11874/16447 | 4096 | 408370 | 3 | 0 -- Unloading cedric=# select * from pgfadvise_loader('pgbench_accounts', 0, false, true, B'111000'); relpath | os_page_size | os_pages_free | pages_loaded | pages_unloaded ------------------+--------------+---------------+--------------+---------------- base/11874/16447 | 4096 | 408370 | 0 | 3 ### pgfincore This function provide information about the file system cache (page cache). cedric=# select * from pgfincore('pgbench_accounts'); relpath | segment | os_page_size | rel_os_pages | pages_mem | group_mem | os_pages_free | databit | pages_dirty | group_dirty --------------------+---------+--------------+--------------+-----------+-----------+---------------+---------+-------------+------------- base/11874/16447 | 0 | 4096 | 262144 | 3 | 1 | 408444 | | 0 | 0 base/11874/16447.1 | 1 | 4096 | 65726 | 0 | 0 | 408444 | | 0 | 0 For the specified relation it returns: * relpath : the relation path * segment : the segment number analyzed * os_page_size : the size of one page * rel_os_pages : the total number of pages of the relation * pages_mem : the total number of relation's pages in page cache. (not the shared buffers from PostgreSQL but the OS cache) * group_mem : the number of groups of adjacent pages_mem * os_page_free : the number of free page in the OS page cache * databit : the varbit map of the file, because of its size it is useless to output Use pgfincore('pgbench_accounts',true) to activate it. * pages_dirty : if HAVE_FINCORE constant is define and the platorm provides the relevant information, like pages_mem but for dirtied pages * group_dirty : if HAVE_FINCORE constant is define and the platorm provides the relevant information, like group_mem but for dirtied pages ## DEBUG You can debug the PgFincore with the following error level: *DEBUG1* and *DEBUG5*. For example: set client_min_messages TO debug1; -- debug5 is only usefull to trace each block ## REQUIREMENTS * PgFincore needs mincore() or fincore() and POSIX_FADVISE ## LIMITATIONS * PgFincore has a limited mode when POSIX_FADVISE is not provided by the platform. * PgFincore needs PostgreSQL >= 8.3 * PgFincore does not work on windows. ## SEE ALSO Data Bene, PostgreSQL Expertise, Technical Support and Assistance, Trainings: https://www.data-bene.io pgfincore-1.3.1/TODO000066400000000000000000000002651450306203400141600ustar00rootroot00000000000000* [sql] average contigous block or stats like that (what part of the file is in cache) * [code] split mmaping in shorter segment (say 64Mb) per sugestion from Andres Freund * graph pgfincore-1.3.1/debian/000077500000000000000000000000001450306203400147075ustar00rootroot00000000000000pgfincore-1.3.1/debian/changelog000066400000000000000000000065271450306203400165730ustar00rootroot00000000000000pgfincore (1.2.4-2) unstable; urgency=medium * Upload for PostgreSQL 15. -- Christoph Berg Fri, 21 Oct 2022 11:59:48 +0200 pgfincore (1.2.4-1) unstable; urgency=medium * New version with PG 15 support. -- Christoph Berg Wed, 28 Sep 2022 14:13:32 +0200 pgfincore (1.2.3-1) unstable; urgency=medium * Fix GitHub watch file. -- Christoph Berg Tue, 11 Jan 2022 12:37:15 +0100 pgfincore (1.2.2-3) unstable; urgency=medium * Upload for PostgreSQL 14. -- Christoph Berg Wed, 03 Nov 2021 14:14:12 +0100 pgfincore (1.2.2-2) unstable; urgency=medium * Upload for PostgreSQL 13. * Use dh --with pgxs. * R³: no. * DH 13. * debian/tests: Use 'make' instead of postgresql-server-dev-all. -- Christoph Berg Mon, 19 Oct 2020 12:39:15 +0200 pgfincore (1.2.2-1) unstable; urgency=medium * Upload for PostgreSQL 12. -- Christoph Berg Tue, 29 Oct 2019 14:57:19 +0100 pgfincore (1.2.1-2) unstable; urgency=medium * Upload for PostgreSQL 11. * Update PostgreSQL team address. -- Christoph Berg Fri, 12 Oct 2018 13:33:10 +0200 pgfincore (1.2.1-1) unstable; urgency=medium * Team upload for PostgreSQL 10 support. * New upstream version. * debian/tests/control: Drop needs-root. -- Christoph Berg Fri, 22 Sep 2017 09:48:18 +0200 pgfincore (1.2-2) unstable; urgency=medium * Upload with 9.6 support. * Update watch file to ignore debian/ tags on github. * Bump S-V and clean up results/. -- Christoph Berg Sat, 24 Sep 2016 13:16:54 +0200 pgfincore (1.2-1) unstable; urgency=medium * New upstream release 1.2. -- Cédric Villemain Thu, 15 Sep 2016 13:13:14 +0200 pgfincore (1.1.2-4) unstable; urgency=medium * Build for PostgreSQL 9.5. (Closes: #811134) * Bump Standards-Version to 3.9.6 (no changes needed). * debian/control.in: Drop obsolete XS-Testsuite: field. -- Martin Pitt Sat, 16 Jan 2016 11:50:19 +0100 pgfincore (1.1.2-3) unstable; urgency=medium * Upload to unstable for 9.4. -- Christoph Berg Sun, 27 Jul 2014 11:16:09 +0200 pgfincore (1.1.2-2) experimental; urgency=medium * Use "all" in debian/pgversions. * B-D on pg-common 158 to build against 9.3 and 9.4. * Use pg_buildext installcheck. * Set team as maintainer. -- Christoph Berg Sun, 06 Jul 2014 18:32:22 +0200 pgfincore (1.1.2-1) unstable; urgency=low * New upstream release with PostgreSQL 9.3 support. (Closes: #725570) * Add autopkgtest support. * Add watch file looking for releases on github. -- Christoph Berg Tue, 10 Dec 2013 15:35:40 +0100 pgfincore (1.1.1-1) unstable; urgency=low * New upstream release -- Cédric Villemain Fri, 02 Dec 2011 22:48:27 +0100 pgfincore (1.1-1) unstable; urgency=low * New upstream release * Clean packaging for 9.1 (Closes: #639460) -- Dimitri Fontaine Mon, 05 Sep 2011 12:56:05 +0200 pgfincore (1.0-1) unstable; urgency=low * New upstream release -- Dimitri Fontaine Wed, 27 Jul 2011 16:21:48 +0200 pgfincore (0.4-1) unstable; urgency=low * Initial packaging -- Dimitri Fontaine Tue, 30 Nov 2010 15:27:25 +0100 pgfincore-1.3.1/debian/control000066400000000000000000000017301450306203400163130ustar00rootroot00000000000000Source: pgfincore Section: database Priority: optional Maintainer: Debian PostgreSQL Maintainers Uploaders: Cédric Villemain , Dimitri Fontaine , Christoph Berg Build-Depends: debhelper-compat (= 13), postgresql-all (>= 217~) Standards-Version: 4.6.1 Rules-Requires-Root: no Vcs-Git: git://git.postgresql.org/git/pgfincore.git Vcs-Browser: http://git.postgresql.org/gitweb/?p=pgfincore.git Homepage: http://villemain.org/projects/pgfincore Package: postgresql-15-pgfincore Architecture: any Depends: ${shlibs:Depends}, ${misc:Depends}, postgresql-15 Description: set of PostgreSQL functions to manage blocks in memory Those functions let you know which and how many disk block from a relation are in the page cache of the operating system, and eventually write the result to a file. Then using this file, it is possible to restore the page cache state for each block of the relation. pgfincore-1.3.1/debian/control.in000066400000000000000000000017461450306203400167270ustar00rootroot00000000000000Source: pgfincore Section: database Priority: optional Maintainer: Debian PostgreSQL Maintainers Uploaders: Cédric Villemain , Dimitri Fontaine , Christoph Berg Build-Depends: debhelper-compat (= 13), postgresql-all (>= 217~) Standards-Version: 4.6.1 Rules-Requires-Root: no Vcs-Git: git://git.postgresql.org/git/pgfincore.git Vcs-Browser: http://git.postgresql.org/gitweb/?p=pgfincore.git Homepage: http://villemain.org/projects/pgfincore Package: postgresql-PGVERSION-pgfincore Architecture: any Depends: ${shlibs:Depends}, ${misc:Depends}, postgresql-PGVERSION Description: set of PostgreSQL functions to manage blocks in memory Those functions let you know which and how many disk block from a relation are in the page cache of the operating system, and eventually write the result to a file. Then using this file, it is possible to restore the page cache state for each block of the relation. pgfincore-1.3.1/debian/copyright000066400000000000000000000034471450306203400166520ustar00rootroot00000000000000Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ Upstream-Name: pgfincore Source: http://git.postgresql.org/gitweb/?p=pgfincore.git Files: * Copyright: 2009-2016 Cédric Villemain License: BSD-3-Clause Files: debian/* Copyright: 2013 Cédric Villemain License: BSD-3-Clause License: BSD-3-Clause Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. . THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. pgfincore-1.3.1/debian/pgversions000066400000000000000000000000041450306203400170230ustar00rootroot00000000000000all pgfincore-1.3.1/debian/rules000077500000000000000000000001431450306203400157650ustar00rootroot00000000000000#!/usr/bin/make -f override_dh_installdocs: dh_installdocs --all README.* %: dh $@ --with pgxs pgfincore-1.3.1/debian/source/000077500000000000000000000000001450306203400162075ustar00rootroot00000000000000pgfincore-1.3.1/debian/source/format000066400000000000000000000000141450306203400174150ustar00rootroot000000000000003.0 (quilt) pgfincore-1.3.1/debian/tests/000077500000000000000000000000001450306203400160515ustar00rootroot00000000000000pgfincore-1.3.1/debian/tests/control000066400000000000000000000001001450306203400174430ustar00rootroot00000000000000Depends: @, make Tests: installcheck Restrictions: allow-stderr pgfincore-1.3.1/debian/tests/installcheck000077500000000000000000000000541450306203400204420ustar00rootroot00000000000000#!/bin/sh set -e pg_buildext installcheck pgfincore-1.3.1/debian/watch000066400000000000000000000001061450306203400157350ustar00rootroot00000000000000version=4 https://github.com/klando/pgfincore/tags .*/([^/-]*).tar.gz pgfincore-1.3.1/examples/000077500000000000000000000000001450306203400153035ustar00rootroot00000000000000pgfincore-1.3.1/examples/buffercache_pgfincore.sql000066400000000000000000000020711450306203400223150ustar00rootroot00000000000000with my_table as ( select oid , relfilenode , relname from pg_class where relname = 'pgbench_accounts' ) , t as ( select generate_series(1, relpages) as g from my_table join pg_class using (relname) ) , buf as ( select relblocknumber * 2 as bn -- Pgfincore use filesystem block size , usagecount as c , isdirty as d from my_table join pg_buffercache using (relfilenode) where relforknumber = 0 ) , pgf as ( select (row_number() over (partition by c)) - 1 as bn -- pascal vs C , c , NULL as d from (select unnest( string_to_array( (pgfincore(my_table.oid, true)).databit::text, NULL ) ) as c from my_table ) g ) , fb as ( select pgf.bn as file_block_number , buf.c as pgcache , buf.d as pgdirty , pgf.c as oscache , pgf.d as osdirty from buf right join pgf using (bn) order by 1, 2, 3 ), res as ( select * from fb ) select row_to_json(res) -- use "res" CTE if no JSON datatype (pg < 9.2) from res; pgfincore-1.3.1/expected/000077500000000000000000000000001450306203400152665ustar00rootroot00000000000000pgfincore-1.3.1/expected/pgfincore.out000066400000000000000000000030141450306203400177710ustar00rootroot00000000000000CREATE EXTENSION pgfincore; -- -- test SYSCONF -- select from pgsysconf(); -- (1 row) select from pgsysconf_pretty(); -- (1 row) -- -- make a temp table to use below -- CREATE TEMP TABLE test AS SELECT generate_series(1,256) as a; -- -- this is not perfect testing but it is hard to predict what the OS will do -- for *sure* -- -- -- test fadvise_loader -- select from pgfadvise_loader('test', 0, true, true, B'1010'); -- (1 row) select from pgfadvise_loader('test', 0, true, false, B'1010'); -- (1 row) select from pgfadvise_loader('test', 0, false, true, B'1010'); -- (1 row) select from pgfadvise_loader('test', 0, false, false, B'1010'); -- (1 row) -- must not fail on empty databit input select from pgfadvise_loader('test', 0, false, false, B''); -- (1 row) -- ERROR on NULL databit input select from pgfadvise_loader('test', 0, false, false, NULL); ERROR: pgfadvise_loader: databit argument shouldn't be NULL CONTEXT: SQL function "pgfadvise_loader" statement 1 -- -- test pgfincore -- select from pgfincore('test', true); -- (1 row) select from pgfincore('test'); -- (1 row) -- -- test DONTNEED, WILLNEED -- select from pgfadvise_willneed('test'); -- (1 row) select from pgfadvise_dontneed('test'); -- (1 row) -- -- test PGFADVISE flags -- select from pgfadvise_sequential('test'); -- (1 row) select from pgfadvise_random('test'); -- (1 row) select from pgfadvise_normal('test'); -- (1 row) -- -- tests drawers -- select NULL || pgfincore_drawer(databit) from pgfincore('test','main',true); ?column? ---------- (1 row) pgfincore-1.3.1/pgfincore--1.2--1.3.1.sql000066400000000000000000000000001450306203400172360ustar00rootroot00000000000000pgfincore-1.3.1/pgfincore--1.3.1.sql000066400000000000000000000115501450306203400166770ustar00rootroot00000000000000-- -- SYSCONF -- CREATE OR REPLACE FUNCTION pgsysconf(OUT os_page_size bigint, OUT os_pages_free bigint, OUT os_total_pages bigint) RETURNS record AS '$libdir/pgfincore' LANGUAGE C; COMMENT ON FUNCTION pgsysconf() IS 'Get system configuration information at run time: - os_page_size is _SC_PAGESIZE - os_pages_free is _SC_AVPHYS_PAGES - os_total_pages is _SC_PHYS_PAGES man 3 sysconf for details'; CREATE OR REPLACE FUNCTION pgsysconf_pretty(OUT os_page_size text, OUT os_pages_free text, OUT os_total_pages text) RETURNS record AS ' select pg_size_pretty(os_page_size) as os_page_size, pg_size_pretty(os_pages_free * os_page_size) as os_pages_free, pg_size_pretty(os_total_pages * os_page_size) as os_total_pages from pgsysconf()' LANGUAGE SQL; COMMENT ON FUNCTION pgsysconf_pretty() IS 'Pgsysconf() with human readable output'; -- -- PGFADVISE -- CREATE OR REPLACE FUNCTION pgfadvise(IN regclass, IN text, IN int, OUT relpath text, OUT os_page_size bigint, OUT rel_os_pages bigint, OUT os_pages_free bigint) RETURNS setof record AS '$libdir/pgfincore' LANGUAGE C; COMMENT ON FUNCTION pgfadvise(regclass, text, int) IS 'Predeclare an access pattern for file data'; CREATE OR REPLACE FUNCTION pgfadvise_willneed(IN regclass, OUT relpath text, OUT os_page_size bigint, OUT rel_os_pages bigint, OUT os_pages_free bigint) RETURNS setof record AS 'SELECT pgfadvise($1, ''main'', 10)' LANGUAGE SQL; CREATE OR REPLACE FUNCTION pgfadvise_dontneed(IN regclass, OUT relpath text, OUT os_page_size bigint, OUT rel_os_pages bigint, OUT os_pages_free bigint) RETURNS setof record AS 'SELECT pgfadvise($1, ''main'', 20)' LANGUAGE SQL; CREATE OR REPLACE FUNCTION pgfadvise_normal(IN regclass, OUT relpath text, OUT os_page_size bigint, OUT rel_os_pages bigint, OUT os_pages_free bigint) RETURNS setof record AS 'SELECT pgfadvise($1, ''main'', 30)' LANGUAGE SQL; CREATE OR REPLACE FUNCTION pgfadvise_sequential(IN regclass, OUT relpath text, OUT os_page_size bigint, OUT rel_os_pages bigint, OUT os_pages_free bigint) RETURNS setof record AS 'SELECT pgfadvise($1, ''main'', 40)' LANGUAGE SQL; CREATE OR REPLACE FUNCTION pgfadvise_random(IN regclass, OUT relpath text, OUT os_page_size bigint, OUT rel_os_pages bigint, OUT os_pages_free bigint) RETURNS setof record AS 'SELECT pgfadvise($1, ''main'', 50)' LANGUAGE SQL; -- -- PGFADVISE_LOADER -- CREATE OR REPLACE FUNCTION pgfadvise_loader(IN regclass, IN text, IN int, IN bool, IN bool, IN varbit, OUT relpath text, OUT os_page_size bigint, OUT os_pages_free bigint, OUT pages_loaded bigint, OUT pages_unloaded bigint) RETURNS setof record AS '$libdir/pgfincore' LANGUAGE C; COMMENT ON FUNCTION pgfadvise_loader(regclass, text, int, bool, bool, varbit) IS 'Restore cache from the snapshot, options to load/unload each block to/from cache'; CREATE OR REPLACE FUNCTION pgfadvise_loader(IN regclass, IN int, IN bool, IN bool, IN varbit, OUT relpath text, OUT os_page_size bigint, OUT os_pages_free bigint, OUT pages_loaded bigint, OUT pages_unloaded bigint) RETURNS setof record AS 'SELECT pgfadvise_loader($1, ''main'', $2, $3, $4, $5)' LANGUAGE SQL; -- -- PGFINCORE -- CREATE OR REPLACE FUNCTION pgfincore(IN regclass, IN text, IN bool, OUT relpath text, OUT segment int, OUT os_page_size bigint, OUT rel_os_pages bigint, OUT pages_mem bigint, OUT group_mem bigint, OUT os_pages_free bigint, OUT databit varbit, OUT pages_dirty bigint, OUT group_dirty bigint) RETURNS setof record AS '$libdir/pgfincore' LANGUAGE C; COMMENT ON FUNCTION pgfincore(regclass, text, bool) IS 'Utility to inspect and get a snapshot of the system cache'; CREATE OR REPLACE FUNCTION pgfincore(IN regclass, IN bool, OUT relpath text, OUT segment int, OUT os_page_size bigint, OUT rel_os_pages bigint, OUT pages_mem bigint, OUT group_mem bigint, OUT os_pages_free bigint, OUT databit varbit, OUT pages_dirty bigint, OUT group_dirty bigint) RETURNS setof record AS 'SELECT * from pgfincore($1, ''main'', $2)' LANGUAGE SQL; CREATE OR REPLACE FUNCTION pgfincore(IN regclass, OUT relpath text, OUT segment int, OUT os_page_size bigint, OUT rel_os_pages bigint, OUT pages_mem bigint, OUT group_mem bigint, OUT os_pages_free bigint, OUT databit varbit, OUT pages_dirty bigint, OUT group_dirty bigint) RETURNS setof record AS 'SELECT * from pgfincore($1, ''main'', false)' LANGUAGE SQL; CREATE OR REPLACE FUNCTION pgfincore_drawer(IN varbit, OUT drawer cstring) RETURNS cstring AS '$libdir/pgfincore' LANGUAGE C; COMMENT ON FUNCTION pgfincore_drawer(varbit) IS 'A naive drawing function to visualize page cache per object'; pgfincore-1.3.1/pgfincore.c000066400000000000000000000657301450306203400156200ustar00rootroot00000000000000/* * PgFincore * This project let you see and mainpulate objects in the FS page cache * Copyright (C) 2009-2011 Cédric Villemain */ /* POSIX stuff */ #define _XOPEN_SOURCE 600 /* fadvise */ #include /* fadvise */ #include /* exit, calloc, free */ #include /* stat, fstat */ #include /* size_t, mincore */ #include /* mmap, mincore */ #include /* sysconf, close */ /* } */ /* PostgreSQL stuff */ #include "postgres.h" /* general Postgres declarations */ #include "access/heapam.h" /* relation_open */ #include "catalog/catalog.h" /* relpath */ #include "catalog/namespace.h" /* makeRangeVarFromNameList */ #include "catalog/pg_type.h" /* TEXTOID for tuple_desc */ #include "funcapi.h" /* SRF */ #include "utils/builtins.h" /* textToQualifiedNameList */ #include "utils/rel.h" /* Relation */ #include "utils/varbit.h" /* bitstring datatype */ #include "storage/fd.h" #include "access/htup_details.h" /* heap_form_tuple */ #include "common/relpath.h" /* relpathbackend */ #ifdef PG_VERSION_NUM #define PG_MAJOR_VERSION (PG_VERSION_NUM / 100) #else #error "Unknown postgresql version" #endif #if PG_VERSION_NUM < 90300 #error "Unsupported postgresql version" #endif #ifdef PG_MODULE_MAGIC PG_MODULE_MAGIC; #endif #define PGSYSCONF_COLS 3 #define PGFADVISE_COLS 4 #define PGFADVISE_LOADER_COLS 5 #define PGFINCORE_COLS 10 #define PGF_WILLNEED 10 #define PGF_DONTNEED 20 #define PGF_NORMAL 30 #define PGF_SEQUENTIAL 40 #define PGF_RANDOM 50 #define FINCORE_PRESENT 0x1 #define FINCORE_DIRTY 0x2 #ifndef HAVE_FINCORE #define FINCORE_BITS 1 #else #define FINCORE_BITS 2 #endif /* * pgfadvise_fctx structure is needed * to keep track of relation path, segment number, ... */ typedef struct { int advice; /* the posix_fadvise advice */ TupleDesc tupd; /* the tuple descriptor */ Relation rel; /* the relation */ unsigned int segcount; /* the segment current number */ char *relationpath; /* the relation path */ } pgfadvise_fctx; /* * pgfadvise structure is needed * to return values */ typedef struct { size_t pageSize; /* os page size */ size_t pagesFree; /* free page cache */ size_t filesize; /* the filesize */ } pgfadviseStruct; /* * pgfloader structure is needed * to return values */ typedef struct { size_t pageSize; /* os page size */ size_t pagesFree; /* free page cache */ size_t pagesLoaded; /* pages loaded */ size_t pagesUnloaded; /* pages unloaded */ } pgfloaderStruct; /* * pgfincore_fctx structure is needed * to keep track of relation path, segment number, ... */ typedef struct { bool getvector; /* output varbit data ? */ TupleDesc tupd; /* the tuple descriptor */ Relation rel; /* the relation */ unsigned int segcount; /* the segment current number */ char *relationpath; /* the relation path */ } pgfincore_fctx; /* * pgfadvise_loader_struct structure is needed * to keep track of relation path, segment number, ... */ typedef struct { size_t pageSize; /* os page size */ size_t pagesFree; /* free page cache */ size_t rel_os_pages; size_t pages_mem; size_t group_mem; size_t pages_dirty; size_t group_dirty; VarBit *databit; } pgfincoreStruct; Datum pgsysconf(PG_FUNCTION_ARGS); Datum pgfadvise(PG_FUNCTION_ARGS); static int pgfadvise_file(char *filename, int advice, pgfadviseStruct *pgfdv); Datum pgfadvise_loader(PG_FUNCTION_ARGS); static int pgfadvise_loader_file(char *filename, bool willneed, bool dontneed, VarBit *databit, pgfloaderStruct *pgfloader); Datum pgfincore(PG_FUNCTION_ARGS); static int pgfincore_file(char *filename, pgfincoreStruct *pgfncr); Datum pgfincore_drawer(PG_FUNCTION_ARGS); #if PG_MAJOR_VERSION < 1600 #define relpathpg(rel, forkName) \ relpathbackend((rel)->rd_node, (rel)->rd_backend, (forkname_to_number(text_to_cstring(forkName)))) #else #define relpathpg(rel, forkName) \ relpathbackend((rel)->rd_locator, (rel)->rd_backend, (forkname_to_number(text_to_cstring(forkName)))) #endif /* * pgsysconf * just output the actual system value for * _SC_PAGESIZE --> Page Size * _SC_AVPHYS_PAGES --> Free page in memory * _SC_PHYS_PAGES --> Total memory * */ PG_FUNCTION_INFO_V1(pgsysconf); Datum pgsysconf(PG_FUNCTION_ARGS) { HeapTuple tuple; TupleDesc tupdesc; Datum values[PGSYSCONF_COLS]; bool nulls[PGSYSCONF_COLS]; /* initialize nulls array to build the tuple */ memset(nulls, 0, sizeof(nulls)); /* Build a tuple descriptor for our result type */ if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) elog(ERROR, "pgsysconf: return type must be a row type"); /* Page size */ values[0] = Int64GetDatum(sysconf(_SC_PAGESIZE)); /* free page in memory */ values[1] = Int64GetDatum(sysconf(_SC_AVPHYS_PAGES)); /* total memory */ values[2] = Int64GetDatum(sysconf(_SC_PHYS_PAGES)); /* Build and return the result tuple. */ tuple = heap_form_tuple(tupdesc, values, nulls); PG_RETURN_DATUM( HeapTupleGetDatum(tuple) ); } #if defined(USE_POSIX_FADVISE) /* * pgfadvise_file */ static int pgfadvise_file(char *filename, int advice, pgfadviseStruct *pgfdv) { /* * We use the AllocateFile(2) provided by PostgreSQL. We're going to * close it ourselves even if PostgreSQL close it anyway at transaction * end. */ FILE *fp; int fd; struct stat st; int adviceFlag; /* * OS Page size and Free pages */ pgfdv->pageSize = sysconf(_SC_PAGESIZE); /* * Fopen and fstat file * fd will be provided to posix_fadvise * if there is no file, just return 1, it is expected to leave the SRF */ fp = AllocateFile(filename, "rb"); if (fp == NULL) return 1; fd = fileno(fp); if (fstat(fd, &st) == -1) { FreeFile(fp); elog(ERROR, "pgfadvise: Can not stat object file : %s", filename); return 2; } /* * the file size is used in the SRF to output the number of pages used by * the segment */ pgfdv->filesize = st.st_size; elog(DEBUG1, "pgfadvise: working on %s of %lld bytes", filename, (long long int) pgfdv->filesize); /* FADVISE_WILLNEED */ if (advice == PGF_WILLNEED) { adviceFlag = POSIX_FADV_WILLNEED; elog(DEBUG1, "pgfadvise: setting advice POSIX_FADV_WILLNEED"); } /* FADVISE_DONTNEED */ else if (advice == PGF_DONTNEED) { adviceFlag = POSIX_FADV_DONTNEED; elog(DEBUG1, "pgfadvise: setting advice POSIX_FADV_DONTNEED"); } /* POSIX_FADV_NORMAL */ else if (advice == PGF_NORMAL) { adviceFlag = POSIX_FADV_NORMAL; elog(DEBUG1, "pgfadvise: setting advice POSIX_FADV_NORMAL"); } /* POSIX_FADV_SEQUENTIAL */ else if (advice == PGF_SEQUENTIAL) { adviceFlag = POSIX_FADV_SEQUENTIAL; elog(DEBUG1, "pgfadvise: setting advice POSIX_FADV_SEQUENTIAL"); } /* POSIX_FADV_RANDOM */ else if (advice == PGF_RANDOM) { adviceFlag = POSIX_FADV_RANDOM; elog(DEBUG1, "pgfadvise: setting advice POSIX_FADV_RANDOM"); } else { elog(ERROR, "pgfadvise: invalid advice: %d", advice); return 2; } /* * Call posix_fadvise with the relevant advice on the file descriptor */ posix_fadvise(fd, 0, 0, adviceFlag); /* close the file */ FreeFile(fp); /* * OS things : Pages free */ pgfdv->pagesFree = sysconf(_SC_AVPHYS_PAGES); return 0; } #else static int pgfadvise_file(char *filename, int advice, pgfadviseStruct *pgfdv) { elog(ERROR, "POSIX_FADVISE UNSUPPORTED on your platform"); return 9; } #endif /* * pgfadvise is a function that handle the process to have a sharelock * on the relation and to walk the segments. * for each segment it call the posix_fadvise with the required flag * parameter */ PG_FUNCTION_INFO_V1(pgfadvise); Datum pgfadvise(PG_FUNCTION_ARGS) { /* SRF Stuff */ FuncCallContext *funcctx; pgfadvise_fctx *fctx; /* our structure use to return values */ pgfadviseStruct *pgfdv; /* our return value, 0 for success */ int result; /* The file we are working on */ char filename[MAXPGPATH]; /* stuff done only on the first call of the function */ if (SRF_IS_FIRSTCALL()) { MemoryContext oldcontext; Oid relOid = PG_GETARG_OID(0); text *forkName = PG_GETARG_TEXT_P(1); int advice = PG_GETARG_INT32(2); /* * Postgresql stuff to return a tuple */ TupleDesc tupdesc; /* create a function context for cross-call persistence */ funcctx = SRF_FIRSTCALL_INIT(); /* * switch to memory context appropriate for multiple function calls */ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); /* allocate memory for user context */ fctx = (pgfadvise_fctx *) palloc(sizeof(pgfadvise_fctx)); /* Build a tuple descriptor for our result type */ if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) elog(ERROR, "pgfadvise: return type must be a row type"); /* provide the tuple descriptor to the fonction structure */ fctx->tupd = tupdesc; /* open the current relation, accessShareLock */ // TODO use try_relation_open instead ? fctx->rel = relation_open(relOid, AccessShareLock); /* we get the common part of the filename of each segment of a relation */ fctx->relationpath = relpathpg(fctx->rel, forkName); /* Here we keep track of current action in all calls */ fctx->advice = advice; /* segcount is used to get the next segment of the current relation */ fctx->segcount = 0; /* And finally we keep track of our initialization */ elog(DEBUG1, "pgfadvise: init done for %s, in fork %s", fctx->relationpath, text_to_cstring(forkName)); funcctx->user_fctx = fctx; MemoryContextSwitchTo(oldcontext); } /* After the first call, we recover our context */ funcctx = SRF_PERCALL_SETUP(); fctx = funcctx->user_fctx; /* * If we are still looking the first segment * relationpath should not be suffixed */ if (fctx->segcount == 0) snprintf(filename, MAXPGPATH, "%s", fctx->relationpath); else snprintf(filename, MAXPGPATH, "%s.%u", fctx->relationpath, fctx->segcount); elog(DEBUG1, "pgfadvise: about to work with %s, current advice : %d", filename, fctx->advice); /* * Call posix_fadvise with the advice, returning the structure */ pgfdv = (pgfadviseStruct *) palloc(sizeof(pgfadviseStruct)); result = pgfadvise_file(filename, fctx->advice, pgfdv); /* * When we have work with all segments of the current relation * We exit from the SRF * Else we build and return the tuple for this segment */ if (result) { elog(DEBUG1, "pgfadvise: closing %s", fctx->relationpath); relation_close(fctx->rel, AccessShareLock); pfree(fctx); SRF_RETURN_DONE(funcctx); } else { /* * Postgresql stuff to return a tuple */ HeapTuple tuple; Datum values[PGFADVISE_COLS]; bool nulls[PGFADVISE_COLS]; /* initialize nulls array to build the tuple */ memset(nulls, 0, sizeof(nulls)); /* prepare the number of the next segment */ fctx->segcount++; /* Filename */ values[0] = CStringGetTextDatum( filename ); /* os page size */ values[1] = Int64GetDatum( (int64) pgfdv->pageSize ); /* number of pages used by segment */ values[2] = Int64GetDatum( (int64) ((pgfdv->filesize+pgfdv->pageSize-1)/pgfdv->pageSize) ); /* free page cache */ values[3] = Int64GetDatum( (int64) pgfdv->pagesFree ); /* Build the result tuple. */ tuple = heap_form_tuple(fctx->tupd, values, nulls); /* Ok, return results, and go for next call */ SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple)); } } #if defined(USE_POSIX_FADVISE) /* * pgfadvise_file */ static int pgfadvise_loader_file(char *filename, bool willneed, bool dontneed, VarBit *databit, pgfloaderStruct *pgfloader) { bits8 *sp; int bitlen; bits8 x; int i, k; /* * We use the AllocateFile(2) provided by PostgreSQL. We're going to * close it ourselves even if PostgreSQL close it anyway at transaction * end. */ FILE *fp; int fd; struct stat st; /* * OS things : Page size */ pgfloader->pageSize = sysconf(_SC_PAGESIZE); /* * we count the action we perform * both are theorical : we don't know if the page was or not in memory * when we call posix_fadvise */ pgfloader->pagesLoaded = 0; pgfloader->pagesUnloaded = 0; /* * Fopen and fstat file * fd will be provided to posix_fadvise * if there is no file, just return 1, it is expected to leave the SRF */ fp = AllocateFile(filename, "rb"); if (fp == NULL) return 1; fd = fileno(fp); if (fstat(fd, &st) == -1) { FreeFile(fp); elog(ERROR, "pgfadvise_loader: Can not stat object file: %s", filename); return 2; } elog(DEBUG1, "pgfadvise_loader: working on %s", filename); bitlen = VARBITLEN(databit); sp = VARBITS(databit); for (i = 0; i < bitlen - BITS_PER_BYTE; i += BITS_PER_BYTE, sp++) { x = *sp; /* Is this bit set ? */ for (k = 0; k < BITS_PER_BYTE; k++) { if (IS_HIGHBIT_SET(x)) { if (willneed) { (void) posix_fadvise(fd, ((i+k) * pgfloader->pageSize), pgfloader->pageSize, POSIX_FADV_WILLNEED); pgfloader->pagesLoaded++; } } else if (dontneed) { (void) posix_fadvise(fd, ((i+k) * pgfloader->pageSize), pgfloader->pageSize, POSIX_FADV_DONTNEED); pgfloader->pagesUnloaded++; } x <<= 1; } } /* * XXX this copy/paste of code to finnish to walk the bits is not pretty */ if (i < bitlen) { /* print the last partial byte */ x = *sp; for (k = i; k < bitlen; k++) { if (IS_HIGHBIT_SET(x)) { if (willneed) { (void) posix_fadvise(fd, (k * pgfloader->pageSize), pgfloader->pageSize, POSIX_FADV_WILLNEED); pgfloader->pagesLoaded++; } } else if (dontneed) { (void) posix_fadvise(fd, (k * pgfloader->pageSize), pgfloader->pageSize, POSIX_FADV_DONTNEED); pgfloader->pagesUnloaded++; } x <<= 1; } } FreeFile(fp); /* * OS things : Pages free */ pgfloader->pagesFree = sysconf(_SC_AVPHYS_PAGES); return 0; } #else static int pgfadvise_loader_file(char *filename, bool willneed, bool dontneed, VarBit *databit, pgfloaderStruct *pgfloader) { elog(ERROR, "POSIX_FADVISE UNSUPPORTED on your platform"); return 9; } #endif /* * * pgfadv_loader to handle work with varbit map of buffer cache. * it is actually used for loading/unloading block to/from buffer cache * */ PG_FUNCTION_INFO_V1(pgfadvise_loader); Datum pgfadvise_loader(PG_FUNCTION_ARGS) { Oid relOid = PG_GETARG_OID(0); text *forkName = PG_GETARG_TEXT_P(1); int segmentNumber = PG_GETARG_INT32(2); bool willneed = PG_GETARG_BOOL(3); bool dontneed = PG_GETARG_BOOL(4); VarBit *databit; /* our structure use to return values */ pgfloaderStruct *pgfloader; Relation rel; char *relationpath; char filename[MAXPGPATH]; /* our return value, 0 for success */ int result; /* * Postgresql stuff to return a tuple */ HeapTuple tuple; TupleDesc tupdesc; Datum values[PGFADVISE_LOADER_COLS]; bool nulls[PGFADVISE_LOADER_COLS]; if (PG_ARGISNULL(5)) elog(ERROR, "pgfadvise_loader: databit argument shouldn't be NULL"); databit = PG_GETARG_VARBIT_P(5); /* initialize nulls array to build the tuple */ memset(nulls, 0, sizeof(nulls)); /* Build a tuple descriptor for our result type */ if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) elog(ERROR, "return type must be a row type"); /* open the current relation in accessShareLock */ rel = relation_open(relOid, AccessShareLock); /* we get the common part of the filename of each segment of a relation */ relationpath = relpathpg(rel, forkName); /* * If we are looking the first segment, * relationpath should not be suffixed */ if (segmentNumber == 0) snprintf(filename, MAXPGPATH, "%s", relationpath); else snprintf(filename, MAXPGPATH, "%s.%u", relationpath, (int) segmentNumber); /* * We don't need the relation anymore * the only purpose was to get a consistent filename * (if file disappear, an error is logged) */ relation_close(rel, AccessShareLock); /* * Call pgfadvise_loader with the varbit */ pgfloader = (pgfloaderStruct *) palloc(sizeof(pgfloaderStruct)); result = pgfadvise_loader_file(filename, willneed, dontneed, databit, pgfloader); if (result != 0) elog(ERROR, "Can't read file %s, fork(%s)", filename, text_to_cstring(forkName)); /* Filename */ values[0] = CStringGetTextDatum( filename ); /* os page size */ values[1] = Int64GetDatum( pgfloader->pageSize ); /* free page cache */ values[2] = Int64GetDatum( pgfloader->pagesFree ); /* pages loaded */ values[3] = Int64GetDatum( pgfloader->pagesLoaded ); /* pages unloaded */ values[4] = Int64GetDatum( pgfloader->pagesUnloaded ); /* Build and return the result tuple. */ tuple = heap_form_tuple(tupdesc, values, nulls); PG_RETURN_DATUM( HeapTupleGetDatum(tuple) ); } /* * pgfincore_file handle the mmaping, mincore process (and access file, etc.) */ static int pgfincore_file(char *filename, pgfincoreStruct *pgfncr) { int flag=1; int flag_dirty=1; int len, bitlen; bits8 *r; bits8 x = 0; register int64 pageIndex; /* * We use the AllocateFile(2) provided by PostgreSQL. We're going to * close it ourselves even if PostgreSQL close it anyway at transaction * end. */ FILE *fp; int fd; struct stat st; #ifndef HAVE_FINCORE void *pa = (char *) 0; #endif unsigned char *vec = (unsigned char *) 0; /* * OS Page size */ pgfncr->pageSize = sysconf(_SC_PAGESIZE); /* * Initialize counters */ pgfncr->pages_mem = 0; pgfncr->group_mem = 0; pgfncr->pages_dirty = 0; pgfncr->group_dirty = 0; pgfncr->rel_os_pages = 0; /* * Fopen and fstat file * fd will be provided to posix_fadvise * if there is no file, just return 1, it is expected to leave the SRF */ fp = AllocateFile(filename, "rb"); if (fp == NULL) return 1; fd = fileno(fp); if (fstat(fd, &st) == -1) { FreeFile(fp); elog(ERROR, "Can not stat object file : %s", filename); return 2; } /* * if file ok * then process */ if (st.st_size != 0) { /* number of pages in the current file */ pgfncr->rel_os_pages = (st.st_size+pgfncr->pageSize-1)/pgfncr->pageSize; #ifndef HAVE_FINCORE pa = mmap(NULL, st.st_size, PROT_NONE, MAP_SHARED, fd, 0); if (pa == MAP_FAILED) { int save_errno = errno; FreeFile(fp); elog(ERROR, "Can not mmap object file : %s, errno = %i,%s\nThis error can happen if there is not enought space in memory to do the projection. Please mail cedric@villemain.org with '[pgfincore] ENOMEM' as subject.", filename, save_errno, strerror(save_errno)); return 3; } #endif /* Prepare our vector containing all blocks information */ vec = calloc(1, (st.st_size+pgfncr->pageSize-1)/pgfncr->pageSize); if ((void *)0 == vec) { #ifndef HAVE_FINCORE munmap(pa, st.st_size); #endif FreeFile(fp); elog(ERROR, "Can not calloc object file : %s", filename); return 4; } #ifndef HAVE_FINCORE /* Affect vec with mincore */ if (mincore(pa, st.st_size, vec) != 0) { int save_errno = errno; munmap(pa, st.st_size); elog(ERROR, "mincore(%p, %lld, %p): %s\n", pa, (long long int)st.st_size, vec, strerror(save_errno)); #else /* Affect vec with fincore */ if (fincore(fd, 0, st.st_size, vec) != 0) { int save_errno = errno; elog(ERROR, "fincore(%u, 0, %lld, %p): %s\n", fd, (long long int)st.st_size, vec, strerror(save_errno)); #endif free(vec); FreeFile(fp); return 5; } /* * prepare the bit string */ bitlen = FINCORE_BITS * ((st.st_size+pgfncr->pageSize-1)/pgfncr->pageSize); len = VARBITTOTALLEN(bitlen); /* * set to 0 so that *r is always initialised and string is zero-padded * XXX: do we need to free that ? */ pgfncr->databit = (VarBit *) palloc0(len); SET_VARSIZE(pgfncr->databit, len); VARBITLEN(pgfncr->databit) = bitlen; r = VARBITS(pgfncr->databit); x = HIGHBIT; /* handle the results */ for (pageIndex = 0; pageIndex <= pgfncr->rel_os_pages; pageIndex++) { // block in memory if (vec[pageIndex] & FINCORE_PRESENT) { pgfncr->pages_mem++; *r |= x; if (FINCORE_BITS > 1) { if (vec[pageIndex] & FINCORE_DIRTY) { pgfncr->pages_dirty++; *r |= (x >> 1); /* we flag to detect contigous blocks in the same state */ if (flag_dirty) pgfncr->group_dirty++; flag_dirty = 0; } else flag_dirty = 1; } elog (DEBUG5, "in memory blocks : %lld / %lld", (long long int) pageIndex, (long long int) pgfncr->rel_os_pages); /* we flag to detect contigous blocks in the same state */ if (flag) pgfncr->group_mem++; flag = 0; } else flag=1; x >>= FINCORE_BITS; if (x == 0) { x = HIGHBIT; r++; } } } elog(DEBUG1, "pgfincore %s: %lld of %lld block in linux cache, %lld groups", filename, (long long int) pgfncr->pages_mem, (long long int) pgfncr->rel_os_pages, (long long int) pgfncr->group_mem); /* * free and close */ free(vec); #ifndef HAVE_FINCORE munmap(pa, st.st_size); #endif FreeFile(fp); /* * OS things : Pages free */ pgfncr->pagesFree = sysconf(_SC_AVPHYS_PAGES); return 0; } /* * pgfincore is a function that handle the process to have a sharelock * on the relation and to walk the segments. * for each segment it call the appropriate function depending on 'action' * parameter */ PG_FUNCTION_INFO_V1(pgfincore); Datum pgfincore(PG_FUNCTION_ARGS) { /* SRF Stuff */ FuncCallContext *funcctx; pgfincore_fctx *fctx; /* our structure use to return values */ pgfincoreStruct *pgfncr; /* our return value, 0 for success */ int result; /* The file we are working on */ char filename[MAXPGPATH]; /* stuff done only on the first call of the function */ if (SRF_IS_FIRSTCALL()) { MemoryContext oldcontext; Oid relOid = PG_GETARG_OID(0); text *forkName = PG_GETARG_TEXT_P(1); bool getvector = PG_GETARG_BOOL(2); /* * Postgresql stuff to return a tuple */ TupleDesc tupdesc; /* create a function context for cross-call persistence */ funcctx = SRF_FIRSTCALL_INIT(); /* * switch to memory context appropriate for multiple function calls */ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); /* allocate memory for user context */ fctx = (pgfincore_fctx *) palloc(sizeof(pgfincore_fctx)); /* Build a tuple descriptor for our result type */ if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) elog(ERROR, "pgfadvise: return type must be a row type"); /* provide the tuple descriptor to the fonction structure */ fctx->tupd = tupdesc; /* are we going to grab and output the varbit data (can be large) */ fctx->getvector = getvector; /* open the current relation, accessShareLock */ // TODO use try_relation_open instead ? fctx->rel = relation_open(relOid, AccessShareLock); /* we get the common part of the filename of each segment of a relation */ fctx->relationpath = relpathpg(fctx->rel, forkName); /* segcount is used to get the next segment of the current relation */ fctx->segcount = 0; /* And finally we keep track of our initialization */ elog(DEBUG1, "pgfincore: init done for %s, in fork %s", fctx->relationpath, text_to_cstring(forkName)); funcctx->user_fctx = fctx; MemoryContextSwitchTo(oldcontext); } /* After the first call, we recover our context */ funcctx = SRF_PERCALL_SETUP(); fctx = funcctx->user_fctx; /* * If we are still looking the first segment * relationpath should not be suffixed */ if (fctx->segcount == 0) snprintf(filename, MAXPGPATH, "%s", fctx->relationpath); else snprintf(filename, MAXPGPATH, "%s.%u", fctx->relationpath, fctx->segcount); elog(DEBUG1, "pgfincore: about to work with %s", filename); /* * Call pgfincore with the advice, returning the structure */ pgfncr = (pgfincoreStruct *) palloc(sizeof(pgfincoreStruct)); result = pgfincore_file(filename, pgfncr); /* * When we have work with all segment of the current relation, test success * We exit from the SRF */ if (result) { elog(DEBUG1, "pgfincore: closing %s", fctx->relationpath); relation_close(fctx->rel, AccessShareLock); pfree(fctx); SRF_RETURN_DONE(funcctx); } else { /* * Postgresql stuff to return a tuple */ HeapTuple tuple; Datum values[PGFINCORE_COLS]; bool nulls[PGFINCORE_COLS]; /* initialize nulls array to build the tuple */ memset(nulls, 0, sizeof(nulls)); /* Filename */ values[0] = CStringGetTextDatum(filename); /* Segment Number */ values[1] = Int32GetDatum(fctx->segcount); /* os page size */ values[2] = Int64GetDatum(pgfncr->pageSize); /* number of pages used by segment */ values[3] = Int64GetDatum(pgfncr->rel_os_pages); /* number of pages in OS cache */ values[4] = Int64GetDatum(pgfncr->pages_mem); /* number of group of contigous page in os cache */ values[5] = Int64GetDatum(pgfncr->group_mem); /* free page cache */ values[6] = Int64GetDatum(pgfncr->pagesFree); /* the map of the file with bit set for in os cache page */ if (fctx->getvector && pgfncr->rel_os_pages) { values[7] = VarBitPGetDatum(pgfncr->databit); } else { nulls[7] = true; values[7] = (Datum) NULL; } /* number of pages dirty in OS cache */ values[8] = Int64GetDatum(pgfncr->pages_dirty); /* number of group of contigous dirty pages in os cache */ values[9] = Int64GetDatum(pgfncr->group_dirty); /* Build the result tuple. */ tuple = heap_form_tuple(fctx->tupd, values, nulls); /* prepare the number of the next segment */ fctx->segcount++; /* Ok, return results, and go for next call */ SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple)); } } /* * pgfincore_drawer A very naive renderer. (for testing) */ PG_FUNCTION_INFO_V1(pgfincore_drawer); Datum pgfincore_drawer(PG_FUNCTION_ARGS) { char *result, *r; int len,i,k; VarBit *databit; bits8 *sp; bits8 x; if (PG_ARGISNULL(0)) elog(ERROR, "pgfincore_drawer: databit argument shouldn't be NULL"); databit = PG_GETARG_VARBIT_P(0); len = VARBITLEN(databit); result = (char *) palloc((len/FINCORE_BITS) + 1); sp = VARBITS(databit); r = result; for (i = 0; i <= len - BITS_PER_BYTE; i += BITS_PER_BYTE, sp++) { x = *sp; /* Is this bit set ? */ for (k = 0; k < (BITS_PER_BYTE/FINCORE_BITS); k++) { char out = ' '; if (IS_HIGHBIT_SET(x)) out = '.' ; x <<= 1; if (FINCORE_BITS > 1) { if (IS_HIGHBIT_SET(x)) out = '*'; x <<= 1; } *r++ = out; } } if (i < len) { /* print the last partial byte */ x = *sp; for (k = i; k < (len/FINCORE_BITS); k++) { char out = ' '; if (IS_HIGHBIT_SET(x)) out = '.' ; x <<= 1; if (FINCORE_BITS > 1) { if (IS_HIGHBIT_SET(x)) out = '*'; x <<= 1; } *r++ = out; } } *r = '\0'; PG_RETURN_CSTRING(result); } pgfincore-1.3.1/pgfincore.control000066400000000000000000000002621450306203400170430ustar00rootroot00000000000000# pgfincore extension comment = 'examine and manage the os buffer cache' default_version = '1.3.1' module_pathname = '$libdir/pgfincore' directory = pgfincore relocatable = true pgfincore-1.3.1/sql/000077500000000000000000000000001450306203400142645ustar00rootroot00000000000000pgfincore-1.3.1/sql/pgfincore.sql000066400000000000000000000023301450306203400167570ustar00rootroot00000000000000CREATE EXTENSION pgfincore; -- -- test SYSCONF -- select from pgsysconf(); select from pgsysconf_pretty(); -- -- make a temp table to use below -- CREATE TEMP TABLE test AS SELECT generate_series(1,256) as a; -- -- this is not perfect testing but it is hard to predict what the OS will do -- for *sure* -- -- -- test fadvise_loader -- select from pgfadvise_loader('test', 0, true, true, B'1010'); select from pgfadvise_loader('test', 0, true, false, B'1010'); select from pgfadvise_loader('test', 0, false, true, B'1010'); select from pgfadvise_loader('test', 0, false, false, B'1010'); -- must not fail on empty databit input select from pgfadvise_loader('test', 0, false, false, B''); -- ERROR on NULL databit input select from pgfadvise_loader('test', 0, false, false, NULL); -- -- test pgfincore -- select from pgfincore('test', true); select from pgfincore('test'); -- -- test DONTNEED, WILLNEED -- select from pgfadvise_willneed('test'); select from pgfadvise_dontneed('test'); -- -- test PGFADVISE flags -- select from pgfadvise_sequential('test'); select from pgfadvise_random('test'); select from pgfadvise_normal('test'); -- -- tests drawers -- select NULL || pgfincore_drawer(databit) from pgfincore('test','main',true);