pax_global_header00006660000000000000000000000064122516232500014510gustar00rootroot0000000000000052 comment=d65b8664039351fe5bc6468a395d8a505a728264 pgfincore-1.1.2/000077500000000000000000000000001225162325000134655ustar00rootroot00000000000000pgfincore-1.1.2/.gitignore000066400000000000000000000001461225162325000154560ustar00rootroot00000000000000.pc debian/control debian/files build-pgfincore-* debian/postgresql-* results/ *.so pgfincore*.tar.gz pgfincore-1.1.2/AUTHORS000066400000000000000000000005251225162325000145370ustar00rootroot00000000000000pgfincore is written by: * Cédric Villemain I take pg_relation_size code as a model, I look at the C interesting part from fincore (http://net.doit.wisc.edu/~plonka/fincore/), and I follow the great idea from http://www.kennygorman.com/wordpress/?p=246. In short, thank you Kenny Gorman, thank you Dave Plonka ! pgfincore-1.1.2/COPYRIGHT000066400000000000000000000030501225162325000147560ustar00rootroot00000000000000/* * Copyright (c) 2009-2013 Cédric Villemain * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the author nor the names of any co-contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY CONTRIBUTORS ``AS IS'' AND ANY EXPRESS * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL CONTRIBUTORS BE LIABLE FOR ANY * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ pgfincore-1.1.2/ChangeLog000066400000000000000000000067041225162325000152460ustar00rootroot0000000000000012/10/2013 Cédric Villemain * 1.1.2 - Fix README filename for PGXS - Update to PostgreSQL 9.3 - Fix faillure on NULL input (pgfadvise_loader) - Several fixes and layout changes 06/21/2012 Cédric Villemain * 1.1.2 - Change the open() call to use AllocateFile, and FreeFile 12/06/2011 Cédric Villemain * 1.1.1 - Fix Makefile again, as well as debian scripts (VPATH) - Add checks (make installcheck) - Improve .gitignore - Add a debian/watch file tracking pgfoundry release - Add regression files to VPATH build 09/07/2011 Cédric Villemain * 1.1.0 - Fix Makefile and remove the dir sql/ (useless and error prone) - Fix the printf of int64 by casting to long long int (i386 and adm64 behave differently with int64) - Updated to work with PostgreSQL 8.3 (TAKATSUKA Haruka) - Improve debian packaging (Dimitri Fontaine) - Add support for *BSD kernels - Remove mention of PGXS in the README 07/28/2011 Cédric Villemain * 1.0.0 - Output varbit containing vector information with pgfincore*() - Add Debian packaging (Dimitri Fontaine) - Update to work with PostgreSQL >= 9.1 (Jeff Janes) - Add total number of pages of memory with pgsysconf() - Add function pgsysconf_pretty() - Major rewrite of the functions - pgfadvise*() to handle simple posix_fadvise call - pgfadvise_loader() to restore file status (pages in/out cache) - pgfincore*() to handle mincore usage - pgsysconf*() to handle sysconf information - Use get_call_result_type() to build the tuple descriptor (suggested by RhodiumToad on IRC) - Remove limitation of usage on temp tables - Improve 9.1 installation (Extension) 04/30/2010 Cédric Villemain * 0.4.1 - use AllocateFile instead of fopen - call PG_GETARG* earlier - remove useless global counter - add error handler in pgfadv_snapshot() - errno to catch the last segment - improve Readme - some minor fix and beautify 01/05/2010 Cédric Villemain * 0.4.0 - fix test is not temp table - add posix_fadvise_willneed flag - add posix_fadvise_dontneed flag - add posix_fadvise_normal flag - add posix_fadvise_sequential flag - add posix_fadvise_random flag - rewrite main SRF - improve output (more informations) - fix copyright - add pgsysconf() - add pgmincore_snapshot to write mincore state in a file - add pgfadv_willneed_snapshot to read mincore state from file 10/26/2009 Cédric Villemain * 0.3.2 - fix fctx init 10/26/2009 Cédric Villemain * 0.3.1 - fix Makefile without PGXS - fix install doc in README 08/12/2009 Cédric Villemain * 0.3 - pgfincore now return a set of record - relname, relpath, block_disk, block_mem, group_mem - this version can only be build againt a postgresql > 8.3 /!\ 08/10/2009 Cédric Villemain * 0.2.1 - fix munmap call error 08/08/2009 Cédric Villemain * 0.2 - add support for 8.4 - fix mmap error when file is empty 06/29/2009 Cédric Villemain * 0.1.1 - cleaning and fixing 06/27/2009 Cédric Villemain * 0.1 - functions are working, basicaly. pgfincore-1.1.2/Makefile000066400000000000000000000060261225162325000151310ustar00rootroot00000000000000EXTENSION = pgfincore EXTVERSION = 1.1.2 EXTCOMMENT = examine and manage the os buffer cache MODULES = $(EXTENSION) MODULEDIR = $(EXTENSION) DOCS = README.rst DATA_built = $(EXTENSION)--$(EXTVERSION).sql $(EXTENSION)--unpackaged--$(EXTVERSION).sql REGRESS = $(EXTENSION).ext EXTRA_CLEAN = $(EXTENSION).control PG_CONFIG = pg_config BUILD_EXTENSION = $(shell $(PG_CONFIG) --version | grep -qE "8\.|9\.0" && echo no || echo yes) ifeq ($(BUILD_EXTENSION),no) DATA_built = DATA = $(EXTENSION).sql uninstall_$(EXTENSION).sql REGRESS = $(EXTENSION) EXTRA_CLEAN = endif PGXS := $(shell $(PG_CONFIG) --pgxs) include $(PGXS) # Build some more files for extension support: ifeq ($(BUILD_EXTENSION),yes) # pgxs is included after variable definition and before targets, so the # PostgreSQL default target is used (all:) # build the extension--unpackaged--version.sql from uninstall_extension.sql # this assumes that the extension was installed via sql script instead of # CREATE EXTENSION. # This won't upgrade from a previous version to the current one. $(EXTENSION)--unpackaged--$(EXTVERSION).sql: uninstall_$(EXTENSION).sql sed 's/DROP /ALTER EXTENSION $(EXTENSION) ADD /' $< > $@ # this copy the extension.sql to extension--version.sql $(EXTENSION)--$(EXTVERSION).sql: $(EXTENSION).sql cp $< $@ # this build extension.control from extension.control.in $(EXTENSION).control: $(EXTENSION).control.in sed 's/EXTVERSION/$(EXTVERSION)/;s/EXTENSION/$(EXTENSION)/;s/EXTCOMMENT/$(EXTCOMMENT)/' $< > $@ endif # Here we override targets # Recent PostgreSQL got a bugfix about that, here we just abuse the upstream fix in the mean-time # FIX HERE before PostgreSQL got the backpatch and push the latest minor, can remove this part when done ifeq ($(BUILD_EXTENSION),yes) install: all installcontrol installdata installdocs installscripts | installdirs ifdef MODULES $(INSTALL_SHLIB) $(addsuffix $(DLSUFFIX), $(MODULES)) '$(DESTDIR)$(pkglibdir)/' endif # MODULES installcontrol: $(addsuffix .control, $(EXTENSION)) ifneq (,$(EXTENSION)) $(INSTALL_DATA) $^ '$(DESTDIR)$(datadir)/extension/' endif installdata: $(DATA) $(DATA_built) ifneq (,$(DATA)$(DATA_built)) $(INSTALL_DATA) $^ '$(DESTDIR)$(datadir)/$(datamoduledir)/' endif installdocs: $(DOCS) ifdef DOCS ifdef docdir $(INSTALL_DATA) $^ '$(DESTDIR)$(docdir)/$(docmoduledir)/' endif # docdir endif # DOCS installscripts: $(SCRIPTS) $(SCRIPTS_built) ifdef SCRIPTS $(INSTALL_SCRIPT) $^ '$(DESTDIR)$(bindir)/' endif # SCRIPTS installdirs: ifneq (,$(EXTENSION)) $(MKDIR_P) '$(DESTDIR)$(datadir)/extension' endif ifneq (,$(DATA)$(DATA_built)) $(MKDIR_P) '$(DESTDIR)$(datadir)/$(datamoduledir)' endif ifneq (,$(MODULES)) $(MKDIR_P) '$(DESTDIR)$(pkglibdir)' endif ifdef DOCS ifdef docdir $(MKDIR_P) '$(DESTDIR)$(docdir)/$(docmoduledir)' endif # docdir endif # DOCS endif dist: git archive --prefix=$(EXTENSION)-$(EXTVERSION)/ -o ../$(EXTENSION)_$(EXTVERSION).orig.tar.gz HEAD deb: make clean make -f debian/rules debian/control dh clean make -f debian/rules orig debuild -us -uc -sa pgfincore-1.1.2/README.rst000066400000000000000000000330551225162325000151620ustar00rootroot00000000000000=========== PgFincore =========== -------------------------------------------------------------- A set of functions to manage pages in memory from PostgreSQL -------------------------------------------------------------- A set of functions to handle low-level management of relations using mincore to explore cache memory. DESCRIPTION =========== With PostgreSQL, each Table or Index is splitted in segments of (usually) 1GB, and each segment is splitted in pages in memory then in blocks for the filesystem. Those functions let you know which and how many disk block from a relation are in the page cache of the operating system. It can provide the result as a VarBit and can be stored in a table. Then using this table, it is possible to restore the page cache state for each block of the relation, even in another server, thanks to Streaming Replication. Other functions are used to set a *POSIX_FADVISE* flag on the entire relation (each segment). The more usefull are probably *WILLNEED* and *DONTNEED* which push and pop blocks of each segments of a relation from page cache, respectively. Each functions are call with at least a table name or an index name (or oid) as a parameter and walk each segment of the relation. DOWNLOAD ======== You can grab the latest code with git:: git clone git://git.postgresql.org/git/pgfincore.git or git://github.com/klando/pgfincore.git And the project is on pgfoundry : http://pgfoundry.org/projects/pgfincore INSTALL ======= From source code:: make clean make su make install For PostgreSQL >= 9.1, log in your database and:: mydb=# CREATE EXTENSION pgfincore; For other release, create the functions from the sql script (it should be in your contrib directory):: psql mydb -f pgfincore.sql PgFincore is also shipped with Debian scripts to build your own package:: aptitude install debhelper postgresql-server-dev-all postgresql-server-dev-9.1 # or postgresql-server-dev-8.4|postgresql-server-dev-9.0 make deb dpkg -i ../postgresql-9.1-pgfincore_1.1.1-1_amd64.deb PgFincore is packaged for *RPM* at http://yum.postgresql.org/ PgFincore is packaged for *debian* at http://pgapt.debian.net/ EXAMPLES ======== Here are some examples of usage. If you want more details go to Documentation_ Get current state of a relation ------------------------------- May be useful:: cedric=# select * from pgfincore('pgbench_accounts'); relpath | segment | os_page_size | rel_os_pages | pages_mem | group_mem | os_pages_free | databit --------------------+---------+--------------+--------------+-----------+-----------+---------------+--------- base/11874/16447 | 0 | 4096 | 262144 | 262144 | 1 | 81016 | base/11874/16447.1 | 1 | 4096 | 65726 | 65726 | 1 | 81016 | (2 rows) Time: 31.563 ms Load a table or an index in OS Page Buffer ------------------------------------------ You may want to try to keep a table or an index into the OS Page Cache, or preload a table before your well know big query is executed (reducing the query time). To do so, just execute the following query:: cedric=# select * from pgfadvise_willneed('pgbench_accounts'); relpath | os_page_size | rel_os_pages | os_pages_free --------------------+--------------+--------------+--------------- base/11874/16447 | 4096 | 262144 | 169138 base/11874/16447.1 | 4096 | 65726 | 103352 (2 rows) Time: 4462,936 ms * The column *os_page_size* report that page size is 4KB. * The column *rel_os_pages* is the number of pages of the specified file. * The column *os_pages_free* is the number of free pages in memory (for caching). Snapshot and Restore the OS Page Buffer state of a table or an index (or more) ------------------------------------------------------------------------------ You may want to restore a table or an index into the OS Page Cache as it was while you did the snapshot. For example if you have to reboot your server, then when PostgreSQL start up the first queries might be slower because neither PostgreSQL or the OS have pages in their respective cache about the relations involved in those first queries. Executing a snapshot and a restore is very simple:: -- Snapshot cedric=# create table pgfincore_snapshot as cedric-# select 'pgbench_accounts'::text as relname,*,now() as date_snapshot cedric-# from pgfincore('pgbench_accounts',true); -- Restore cedric=# select * from pgfadvise_loader('pgbench_accounts', 0, true, true, (select databit from pgfincore_snapshot where relname='pgbench_accounts' and segment = 0)); relpath | os_page_size | os_pages_free | pages_loaded | pages_unloaded ------------------+--------------+---------------+--------------+---------------- base/11874/16447 | 4096 | 80867 | 262144 | 0 (1 row) Time: 35.349 ms * The column *pages_loaded* report how many pages have been read to memory (they may have already been in memoy) * The column *pages_unloaded* report how many pages have been removed from memory (they may not have already been in memoy); SYNOPSIS ======== :: pgsysconf(OUT os_page_size bigint, OUT os_pages_free bigint, OUT os_total_pages bigint) RETURNS record pgsysconf_pretty(OUT os_page_size text, OUT os_pages_free text, OUT os_total_pages text) RETURNS record pgfadvise(IN relname regclass, IN fork text, IN action int, OUT relpath text, OUT os_page_size bigint, OUT rel_os_pages bigint, OUT os_pages_free bigint) RETURNS setof record pgfadvise_willneed(IN relname regclass, OUT relpath text, OUT os_page_size bigint, OUT rel_os_pages bigint, OUT os_pages_free bigint) RETURNS setof record pgfadvise_dontneed(IN relname regclass, OUT relpath text, OUT os_page_size bigint, OUT rel_os_pages bigint, OUT os_pages_free bigint) RETURNS setof record pgfadvise_normal(IN relname regclass, OUT relpath text, OUT os_page_size bigint, OUT rel_os_pages bigint, OUT os_pages_free bigint) RETURNS setof record pgfadvise_sequential(IN relname regclass, OUT relpath text, OUT os_page_size bigint, OUT rel_os_pages bigint, OUT os_pages_free bigint) RETURNS setof record pgfadvise_random(IN relname regclass, OUT relpath text, OUT os_page_size bigint, OUT rel_os_pages bigint, OUT os_pages_free bigint) RETURNS setof record pgfadvise_loader(IN relname regclass, IN fork text, IN segment int, IN load bool, IN unload bool, IN databit varbit, OUT relpath text, OUT os_page_size bigint, OUT os_pages_free bigint, OUT pages_loaded bigint, OUT pages_unloaded bigint) RETURNS setof record pgfadvise_loader(IN relname regclass, IN segment int, IN load bool, IN unload bool, IN databit varbit, OUT relpath text, OUT os_page_size bigint, OUT os_pages_free bigint, OUT pages_loaded bigint, OUT pages_unloaded bigint) RETURNS setof record pgfincore(IN relname regclass, IN fork text, IN getdatabit bool, OUT relpath text, OUT segment int, OUT os_page_size bigint, OUT rel_os_pages bigint, OUT pages_mem bigint, OUT group_mem bigint, OUT os_pages_free bigint, OUT databit varbit) RETURNS setof record pgfincore(IN relname regclass, IN getdatabit bool, OUT relpath text, OUT segment int, OUT os_page_size bigint, OUT rel_os_pages bigint, OUT pages_mem bigint, OUT group_mem bigint, OUT os_pages_free bigint, OUT databit varbit) RETURNS setof record pgfincore(IN relname regclass, OUT relpath text, OUT segment int, OUT os_page_size bigint, OUT rel_os_pages bigint, OUT pages_mem bigint, OUT group_mem bigint, OUT os_pages_free bigint, OUT databit varbit) RETURNS setof record DOCUMENTATION ============= pgsysconf --------- This function output size of OS blocks, number of free page in the OS Page Buffer. :: cedric=# select * from pgsysconf(); os_page_size | os_pages_free | os_total_pages --------------+---------------+---------------- 4096 | 80431 | 4094174 pgsysconf_pretty ---------------- The same as above, but with pretty output. :: cedric=# select * from pgsysconf_pretty(); os_page_size | os_pages_free | os_total_pages --------------+---------------+---------------- 4096 bytes | 314 MB | 16 GB pgfadvise_WILLNEED ------------------ This function set *WILLNEED* flag on the current relation. It means that the Operating Sytem will try to load as much pages as possible of the relation. Main idea is to preload files on server startup, perhaps using cache hit/miss ratio or most required relations/indexes. :: cedric=# select * from pgfadvise_willneed('pgbench_accounts'); relpath | os_page_size | rel_os_pages | os_pages_free --------------------+--------------+--------------+--------------- base/11874/16447 | 4096 | 262144 | 80650 base/11874/16447.1 | 4096 | 65726 | 80650 pgfadvise_DONTNEED ------------------ This function set *DONTNEED* flag on the current relation. It means that the Operating System will first unload pages of the file if it need to free some memory. Main idea is to unload files when they are not usefull anymore (instead of perhaps more interesting pages) :: cedric=# select * from pgfadvise_dontneed('pgbench_accounts'); relpath | os_page_size | rel_os_pages | os_pages_free --------------------+--------------+--------------+--------------- base/11874/16447 | 4096 | 262144 | 342071 base/11874/16447.1 | 4096 | 65726 | 408103 pgfadvise_NORMAL ---------------- This function set *NORMAL* flag on the current relation. pgfadvise_SEQUENTIAL -------------------- This function set *SEQUENTIAL* flag on the current relation. pgfadvise_RANDOM ---------------- This function set *RANDOM* flag on the current relation. pgfadvise_loader ---------------- This function allow to interact directly with the Page Cache. It can be used to load and/or unload page from memory based on a varbit representing the map of the pages to load/unload accordingly. Work with relation pgbench_accounts, segment 0, arbitrary varbit map:: -- Loading and Unloading cedric=# select * from pgfadvise_loader('pgbench_accounts', 0, true, true, B'111000'); relpath | os_page_size | os_pages_free | pages_loaded | pages_unloaded ------------------+--------------+---------------+--------------+---------------- base/11874/16447 | 4096 | 408376 | 3 | 3 -- Loading cedric=# select * from pgfadvise_loader('pgbench_accounts', 0, true, false, B'111000'); relpath | os_page_size | os_pages_free | pages_loaded | pages_unloaded ------------------+--------------+---------------+--------------+---------------- base/11874/16447 | 4096 | 408370 | 3 | 0 -- Unloading cedric=# select * from pgfadvise_loader('pgbench_accounts', 0, false, true, B'111000'); relpath | os_page_size | os_pages_free | pages_loaded | pages_unloaded ------------------+--------------+---------------+--------------+---------------- base/11874/16447 | 4096 | 408370 | 0 | 3 pgfincore --------- This function provide information about the file system cache (page cache). :: cedric=# select * from pgfincore('pgbench_accounts'); relpath | segment | os_page_size | rel_os_pages | pages_mem | group_mem | os_pages_free | databit --------------------+---------+--------------+--------------+-----------+-----------+---------------+--------- base/11874/16447 | 0 | 4096 | 262144 | 3 | 1 | 408444 | base/11874/16447.1 | 1 | 4096 | 65726 | 0 | 0 | 408444 | For the specified relation it returns: * relpath : the relation path * segment : the segment number analyzed * os_page_size : the size of one page * rel_os_pages : the total number of pages of the relation * pages_mem : the total number of relation's pages in page cache. (not the shared buffers from PostgreSQL but the OS cache) * group_mem : the number of groups of adjacent pages_mem * os_page_free : the number of free page in the OS page cache * databit : the varbit map of the file, because of its size it is useless to output Use pgfincore('pgbench_accounts',true) to activate it. DEBUG ===== You can debug the PgFincore with the following error level: *DEBUG1* and *DEBUG5*. For example:: set client_min_messages TO debug1; -- debug5 is only usefull to trace each block LIMITATIONS =========== * PgFincore needs mincore() and POSIX_FADVISE. * PgFincore has a limited mode when POSIX_FADVISE is not provided by the platform. * PgFincore needs PostgreSQL >= 8.3 * PgFincore does not work on windows. SEE ALSO ======== 2ndQuadrant, PostgreSQL Expertise, developement, training and 24x7 support: http://2ndQuadrant.fr pgfincore-1.1.2/TODO000066400000000000000000000003501225162325000141530ustar00rootroot00000000000000* [sql] average contigous block or stats like that (what part of the file is in cache) * [code] split mmaping in shorter segment (say 64Mb) per sugestion from Andres Freund * graph * [debian] package checks and do make installcheck pgfincore-1.1.2/debian/000077500000000000000000000000001225162325000147075ustar00rootroot00000000000000pgfincore-1.1.2/debian/changelog000066400000000000000000000015731225162325000165670ustar00rootroot00000000000000pgfincore (1.1.2-1) UNRELEASED; urgency=low * New upstream release with PostgreSQL 9.3 support. (Closes: #725570) * Add autopkgtest support. * Add watch file looking for releases on github. -- Christoph Berg Tue, 10 Dec 2013 14:53:20 +0100 pgfincore (1.1.1-1) unstable; urgency=low * New upstream release -- Cédric Villemain Fri, 02 Dec 2011 22:48:27 +0100 pgfincore (1.1-1) unstable; urgency=low * New upstream release * Clean packaging for 9.1 (Closes: #639460) -- Dimitri Fontaine Mon, 05 Sep 2011 12:56:05 +0200 pgfincore (1.0-1) unstable; urgency=low * New upstream release -- Dimitri Fontaine Wed, 27 Jul 2011 16:21:48 +0200 pgfincore (0.4-1) unstable; urgency=low * Initial packaging -- Dimitri Fontaine Tue, 30 Nov 2010 15:27:25 +0100 pgfincore-1.1.2/debian/compat000066400000000000000000000000021225162325000161050ustar00rootroot000000000000008 pgfincore-1.1.2/debian/control.in000066400000000000000000000016501225162325000167210ustar00rootroot00000000000000Source: pgfincore Section: database Priority: extra Maintainer: Cédric Villemain Uploaders: Dimitri Fontaine , Christoph Berg Build-Depends: debhelper (>= 8.9.0), postgresql-server-dev-all (>= 118) Standards-Version: 3.9.5 Vcs-Git: git://git.postgresql.org/git/pgfincore.git Vcs-browser: http://git.postgresql.org/gitweb/?p=pgfincore.git Homepage: http://villemain.org/projects/pgfincore XS-Testsuite: autopkgtest Package: postgresql-PGVERSION-pgfincore Architecture: any Depends: ${shlibs:Depends}, ${misc:Depends}, postgresql-PGVERSION Description: set of PostgreSQL functions to manage blocks in memory Those functions let you know which and how many disk block from a relation are in the page cache of the operating system, and eventually write the result to a file. Then using this file, it is possible to restore the page cache state for each block of the relation. pgfincore-1.1.2/debian/copyright000066400000000000000000000034471225162325000166520ustar00rootroot00000000000000Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ Upstream-Name: pgfincore Source: http://git.postgresql.org/gitweb/?p=pgfincore.git Files: * Copyright: 2009-2013 Cédric Villemain License: BSD-3-Clause Files: debian/* Copyright: 2013 Cédric Villemain License: BSD-3-Clause License: BSD-3-Clause Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the University nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. . THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. pgfincore-1.1.2/debian/pgversions000066400000000000000000000000301225162325000170220ustar00rootroot000000000000008.3 8.4 9.0 9.1 9.2 9.3 pgfincore-1.1.2/debian/rules000077500000000000000000000014631225162325000157730ustar00rootroot00000000000000#!/usr/bin/make -f # -*- makefile -*- EXTNAME = $(shell dpkg-parsechangelog | awk -F '[:-]' '/^Source:/ { print substr($$2, 2) }') PKGVERS = $(shell dpkg-parsechangelog | awk -F '[:-]' '/^Version:/ { print substr($$2, 2) }') SRCDIR = $(CURDIR) TARGET = build-$(EXTNAME)-%v include /usr/share/postgresql-common/pgxs_debian_control.mk clean: debian/control .PHONY: debian/control override_dh_auto_clean: +pg_buildext clean $(SRCDIR) $(TARGET) $(MAKE) clean override_dh_auto_build: # build all supported version +pg_buildext build $(SRCDIR) $(TARGET) override_dh_auto_install: # then install each of them +pg_buildext install $(SRCDIR) $(TARGET) postgresql-%v-$(EXTNAME) orig: clean git archive --prefix=$(EXTNAME)-$(PKGVERS)/ -o ../$(EXTNAME)_$(PKGVERS).orig.tar.gz HEAD %: dh $@ .PHONY: debian/control pgfincore-1.1.2/debian/source/000077500000000000000000000000001225162325000162075ustar00rootroot00000000000000pgfincore-1.1.2/debian/source/format000066400000000000000000000000141225162325000174150ustar00rootroot000000000000003.0 (quilt) pgfincore-1.1.2/debian/tests/000077500000000000000000000000001225162325000160515ustar00rootroot00000000000000pgfincore-1.1.2/debian/tests/control000066400000000000000000000001401225162325000174470ustar00rootroot00000000000000Depends: @, postgresql-server-dev-all Tests: installcheck Restrictions: needs-root allow-stderr pgfincore-1.1.2/debian/tests/installcheck000077500000000000000000000004251225162325000204440ustar00rootroot00000000000000#!/bin/sh set -e for v in $(pg_buildext supported-versions); do if ! pg_virtualenv -v $v \ make installcheck PG_CONFIG=/usr/lib/postgresql/$v/bin/pg_config; then if [ -r regression.diffs ]; then echo "**** regression.diffs ****" cat regression.diffs fi fi done pgfincore-1.1.2/debian/watch000066400000000000000000000001351225162325000157370ustar00rootroot00000000000000version=3 https://github.com/klando/pgfincore/releases /klando/pgfincore/archive/(.*).tar.gz pgfincore-1.1.2/examples/000077500000000000000000000000001225162325000153035ustar00rootroot00000000000000pgfincore-1.1.2/examples/buffercache_pgfincore.sql000066400000000000000000000020711225162325000223150ustar00rootroot00000000000000with my_table as ( select oid , relfilenode , relname from pg_class where relname = 'pgbench_accounts' ) , t as ( select generate_series(1, relpages) as g from my_table join pg_class using (relname) ) , buf as ( select relblocknumber * 2 as bn -- Pgfincore use filesystem block size , usagecount as c , isdirty as d from my_table join pg_buffercache using (relfilenode) where relforknumber = 0 ) , pgf as ( select (row_number() over (partition by c)) - 1 as bn -- pascal vs C , c , NULL as d from (select unnest( string_to_array( (pgfincore(my_table.oid, true)).databit::text, NULL ) ) as c from my_table ) g ) , fb as ( select pgf.bn as file_block_number , buf.c as pgcache , buf.d as pgdirty , pgf.c as oscache , pgf.d as osdirty from buf right join pgf using (bn) order by 1, 2, 3 ), res as ( select * from fb ) select row_to_json(res) -- use "res" CTE if no JSON datatype (pg < 9.2) from res; pgfincore-1.1.2/expected/000077500000000000000000000000001225162325000152665ustar00rootroot00000000000000pgfincore-1.1.2/expected/pgfincore.ext.out000066400000000000000000000032261225162325000205750ustar00rootroot00000000000000CREATE EXTENSION pgfincore; -- -- test SYSCONF -- select true from pgsysconf(); bool ------ t (1 row) select true from pgsysconf_pretty(); bool ------ t (1 row) -- -- make a temp table to use below -- CREATE TEMP TABLE test AS SELECT generate_series(1,256) as a; -- -- this is not perfect testing but it is hard to predict what the OS will do -- for *sure* -- -- -- test fadvise_loader -- select true from pgfadvise_loader('test', 0, true, true, B'1010'); bool ------ t (1 row) select true from pgfadvise_loader('test', 0, true, false, B'1010'); bool ------ t (1 row) select true from pgfadvise_loader('test', 0, false, true, B'1010'); bool ------ t (1 row) select true from pgfadvise_loader('test', 0, false, false, B'1010'); bool ------ t (1 row) -- must not fail on empty databit input select true from pgfadvise_loader('test', 0, false, false, B''); bool ------ t (1 row) -- ERROR on NULL databit input select true from pgfadvise_loader('test', 0, false, false, NULL); ERROR: pgfadvise_loader: databit argument shouldn't be NULL CONTEXT: SQL function "pgfadvise_loader" statement 1 -- -- test pgfincore -- select true from pgfincore('test', true); bool ------ t (1 row) select true from pgfincore('test'); bool ------ t (1 row) -- -- test DONTNEED, WILLNEED -- select true from pgfadvise_willneed('test'); bool ------ t (1 row) select true from pgfadvise_dontneed('test'); bool ------ t (1 row) -- -- test PGFADVISE flags -- select true from pgfadvise_sequential('test'); bool ------ t (1 row) select true from pgfadvise_random('test'); bool ------ t (1 row) select true from pgfadvise_normal('test'); bool ------ t (1 row) pgfincore-1.1.2/expected/pgfincore.out000066400000000000000000000033071225162325000177760ustar00rootroot00000000000000SET client_min_messages = warning; \set ECHO none RESET client_min_messages; -- -- test SYSCONF -- select true from pgsysconf(); bool ------ t (1 row) select true from pgsysconf_pretty(); bool ------ t (1 row) -- -- make a temp table to use below -- CREATE TEMP TABLE test AS SELECT generate_series(1,256) as a; -- -- this is not perfect testing but it is hard to predict what the OS will do -- for *sure* -- -- -- test fadvise_loader -- select true from pgfadvise_loader('test', 0, true, true, B'1010'); bool ------ t (1 row) select true from pgfadvise_loader('test', 0, true, false, B'1010'); bool ------ t (1 row) select true from pgfadvise_loader('test', 0, false, true, B'1010'); bool ------ t (1 row) select true from pgfadvise_loader('test', 0, false, false, B'1010'); bool ------ t (1 row) -- must not fail on empty databit input select true from pgfadvise_loader('test', 0, false, false, B''); bool ------ t (1 row) -- ERROR on NULL databit input select true from pgfadvise_loader('test', 0, false, false, NULL); ERROR: pgfadvise_loader: databit argument shouldn't be NULL CONTEXT: SQL function "pgfadvise_loader" statement 1 -- -- test pgfincore -- select true from pgfincore('test', true); bool ------ t (1 row) select true from pgfincore('test'); bool ------ t (1 row) -- -- test DONTNEED, WILLNEED -- select true from pgfadvise_willneed('test'); bool ------ t (1 row) select true from pgfadvise_dontneed('test'); bool ------ t (1 row) -- -- test PGFADVISE flags -- select true from pgfadvise_sequential('test'); bool ------ t (1 row) select true from pgfadvise_random('test'); bool ------ t (1 row) select true from pgfadvise_normal('test'); bool ------ t (1 row) pgfincore-1.1.2/expected/pgfincore_2.out000066400000000000000000000040771225162325000202240ustar00rootroot00000000000000SET client_min_messages = warning; \set ECHO none RESET client_min_messages; -- -- test SYSCONF -- select true from pgsysconf(); bool ------ t (1 row) select true from pgsysconf_pretty(); bool ------ t (1 row) -- -- make a temp table to use below -- CREATE TEMP TABLE test AS SELECT generate_series(1,256) as a; -- -- this is not perfect testing but it is hard to predict what the OS will do -- for *sure* -- -- -- test fadvise_loader -- select true from pgfadvise_loader('test', 0, true, true, B'1010'); ERROR: POSIX_FADVISE UNSUPPORTED on your platform CONTEXT: SQL function "pgfadvise_loader" statement 1 select true from pgfadvise_loader('test', 0, true, false, B'1010'); ERROR: POSIX_FADVISE UNSUPPORTED on your platform CONTEXT: SQL function "pgfadvise_loader" statement 1 select true from pgfadvise_loader('test', 0, false, true, B'1010'); ERROR: POSIX_FADVISE UNSUPPORTED on your platform CONTEXT: SQL function "pgfadvise_loader" statement 1 select true from pgfadvise_loader('test', 0, false, false, B'1010'); ERROR: POSIX_FADVISE UNSUPPORTED on your platform CONTEXT: SQL function "pgfadvise_loader" statement 1 -- -- test pgfincore -- select true from pgfincore('test', true); bool ------ t (1 row) select true from pgfincore('test'); bool ------ t (1 row) -- -- test DONTNEED, WILLNEED -- select true from pgfadvise_willneed('test'); ERROR: POSIX_FADVISE UNSUPPORTED on your platform CONTEXT: SQL function "pgfadvise_willneed" statement 1 select true from pgfadvise_dontneed('test'); ERROR: POSIX_FADVISE UNSUPPORTED on your platform CONTEXT: SQL function "pgfadvise_dontneed" statement 1 -- -- test PGFADVISE flags -- select true from pgfadvise_sequential('test'); ERROR: POSIX_FADVISE UNSUPPORTED on your platform CONTEXT: SQL function "pgfadvise_sequential" statement 1 select true from pgfadvise_random('test'); ERROR: POSIX_FADVISE UNSUPPORTED on your platform CONTEXT: SQL function "pgfadvise_random" statement 1 select true from pgfadvise_normal('test'); ERROR: POSIX_FADVISE UNSUPPORTED on your platform CONTEXT: SQL function "pgfadvise_normal" statement 1 pgfincore-1.1.2/pgfincore.c000066400000000000000000000634251225162325000156170ustar00rootroot00000000000000/* * PgFincore * This project let you see and mainpulate objects in the FS page cache * Copyright (C) 2009-2011 Cédric Villemain */ /* { POSIX stuff */ #define _XOPEN_SOURCE 600 /* fadvise */ #include /* exit, calloc, free */ #include /* stat, fstat */ #include /* size_t, mincore */ #include /* sysconf, close */ #include /* mmap, mincore */ #include /* fadvise */ /* } */ /* { PostgreSQL stuff */ #include "postgres.h" /* general Postgres declarations */ #include "access/heapam.h" /* relation_open */ #include "catalog/catalog.h" /* relpath */ #include "catalog/namespace.h" /* makeRangeVarFromNameList */ #include "utils/builtins.h" /* textToQualifiedNameList */ #include "utils/rel.h" /* Relation */ #include "utils/varbit.h" /* bitstring datatype */ #include "funcapi.h" /* SRF */ #include "catalog/pg_type.h" /* TEXTOID for tuple_desc */ #include "storage/fd.h" #ifdef PG_VERSION_NUM #define PG_MAJOR_VERSION (PG_VERSION_NUM / 100) #else #error "Unknown postgresql version" #endif #if PG_VERSION_NUM < 80300 #error "Unsupported postgresql version" #endif #if PG_VERSION_NUM > 90299 #include "access/htup_details.h" /* 9.3 heap_form_tuple */ #include "common/relpath.h" /* 9.3 relpathbackend */ #endif #ifdef PG_MODULE_MAGIC PG_MODULE_MAGIC; #endif /* } */ #define PGSYSCONF_COLS 3 #define PGFADVISE_COLS 4 #define PGFADVISE_LOADER_COLS 5 #define PGFINCORE_COLS 8 #define PGF_WILLNEED 10 #define PGF_DONTNEED 20 #define PGF_NORMAL 30 #define PGF_SEQUENTIAL 40 #define PGF_RANDOM 50 /* * pgfadvise_fctx structure is needed * to keep track of relation path, segment number, ... */ typedef struct { int advice; /* the posix_fadvise advice */ TupleDesc tupd; /* the tuple descriptor */ Relation rel; /* the relation */ unsigned int segcount; /* the segment current number */ char *relationpath; /* the relation path */ } pgfadvise_fctx; /* * pgfadvise structure is needed * to return values */ typedef struct { size_t pageSize; /* os page size */ size_t pagesFree; /* free page cache */ size_t filesize; /* the filesize */ } pgfadviseStruct; /* * pgfloader structure is needed * to return values */ typedef struct { size_t pageSize; /* os page size */ size_t pagesFree; /* free page cache */ size_t pagesLoaded; /* pages loaded */ size_t pagesUnloaded; /* pages unloaded */ } pgfloaderStruct; /* * pgfincore_fctx structure is needed * to keep track of relation path, segment number, ... */ typedef struct { bool getvector; /* output varbit data ? */ TupleDesc tupd; /* the tuple descriptor */ Relation rel; /* the relation */ unsigned int segcount; /* the segment current number */ char *relationpath; /* the relation path */ } pgfincore_fctx; /* * pgfadvise_loader_struct structure is needed * to keep track of relation path, segment number, ... */ typedef struct { size_t pageSize; /* os page size */ size_t pagesFree; /* free page cache */ size_t rel_os_pages; size_t pages_mem; size_t group_mem; VarBit *databit; } pgfincoreStruct; Datum pgsysconf(PG_FUNCTION_ARGS); Datum pgfadvise(PG_FUNCTION_ARGS); static int pgfadvise_file(char *filename, int advice, pgfadviseStruct *pgfdv); Datum pgfadvise_loader(PG_FUNCTION_ARGS); static int pgfadvise_loader_file(char *filename, bool willneed, bool dontneed, VarBit *databit, pgfloaderStruct *pgfloader); Datum pgfincore(PG_FUNCTION_ARGS); static int pgfincore_file(char *filename, pgfincoreStruct *pgfncr); /* * We need to add some handler to keep the code clean * and support 8.3, 8.4 and 9.0 */ #if PG_MAJOR_VERSION == 803 #if defined(HAVE_DECL_POSIX_FADVISE) #define USE_POSIX_FADVISE #endif char *text_to_cstring(const text *t); text *cstring_to_text(const char *s); text *cstring_to_text_with_len(const char *s, int len); char * text_to_cstring(const text *t) { /* must cast away the const, unfortunately */ text *tunpacked = pg_detoast_datum_packed((struct varlena *) t); int len = VARSIZE_ANY_EXHDR(tunpacked); char *result; result = (char *) palloc(len + 1); memcpy(result, VARDATA_ANY(tunpacked), len); result[len] = '\0'; if (tunpacked != t) pfree(tunpacked); return result; } text * cstring_to_text_with_len(const char *s, int len) { text *result = (text *) palloc(len + VARHDRSZ); SET_VARSIZE(result, len + VARHDRSZ); memcpy(VARDATA(result), s, len); return result; } text * cstring_to_text(const char *s) { return cstring_to_text_with_len(s, strlen(s)); } #define CStringGetTextDatum(s) PointerGetDatum(cstring_to_text(s)) #define relpathpg(rel, forkName) \ relpath((rel)->rd_node) #elif PG_MAJOR_VERSION == 804 || PG_MAJOR_VERSION == 900 #define relpathpg(rel, forkName) \ relpath((rel)->rd_node, forkname_to_number(text_to_cstring(forkName))) #else #define relpathpg(rel, forkName) \ relpathbackend((rel)->rd_node, (rel)->rd_backend, (forkname_to_number(text_to_cstring(forkName)))) #endif /* * pgsysconf * just output the actual system value for * _SC_PAGESIZE --> Page Size * _SC_AVPHYS_PAGES --> Free page in memory * _SC_PHYS_PAGES --> Total memory * */ PG_FUNCTION_INFO_V1(pgsysconf); Datum pgsysconf(PG_FUNCTION_ARGS) { HeapTuple tuple; TupleDesc tupdesc; Datum values[PGSYSCONF_COLS]; bool nulls[PGSYSCONF_COLS]; /* initialize nulls array to build the tuple */ memset(nulls, 0, sizeof(nulls)); /* Build a tuple descriptor for our result type */ if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) elog(ERROR, "pgsysconf: return type must be a row type"); /* Page size */ values[0] = Int64GetDatum(sysconf(_SC_PAGESIZE)); /* free page in memory */ values[1] = Int64GetDatum(sysconf(_SC_AVPHYS_PAGES)); /* total memory */ values[2] = Int64GetDatum(sysconf(_SC_PHYS_PAGES)); /* Build and return the result tuple. */ tuple = heap_form_tuple(tupdesc, values, nulls); PG_RETURN_DATUM( HeapTupleGetDatum(tuple) ); } #if defined(USE_POSIX_FADVISE) /* * pgfadvise_file */ static int pgfadvise_file(char *filename, int advice, pgfadviseStruct *pgfdv) { /* * We use the AllocateFile(2) provided by PostgreSQL. We're going to * close it ourselves even if PostgreSQL close it anyway at transaction * end. */ FILE *fp; int fd; struct stat st; int adviceFlag; /* * OS Page size and Free pages */ pgfdv->pageSize = sysconf(_SC_PAGESIZE); /* * Fopen and fstat file * fd will be provided to posix_fadvise * if there is no file, just return 1, it is expected to leave the SRF */ fp = AllocateFile(filename, "rb"); if (fp == NULL) return 1; fd = fileno(fp); if (fstat(fd, &st) == -1) { FreeFile(fp); elog(ERROR, "pgfadvise: Can not stat object file : %s", filename); return 2; } /* * the file size is used in the SRF to output the number of pages used by * the segment */ pgfdv->filesize = st.st_size; elog(DEBUG1, "pgfadvise: working on %s of %lld bytes", filename, (long long int) pgfdv->filesize); /* FADVISE_WILLNEED */ if (advice == PGF_WILLNEED) { adviceFlag = POSIX_FADV_WILLNEED; elog(DEBUG1, "pgfadvise: setting advice POSIX_FADV_WILLNEED"); } /* FADVISE_DONTNEED */ else if (advice == PGF_DONTNEED) { adviceFlag = POSIX_FADV_DONTNEED; elog(DEBUG1, "pgfadvise: setting advice POSIX_FADV_DONTNEED"); } /* POSIX_FADV_NORMAL */ else if (advice == PGF_NORMAL) { adviceFlag = POSIX_FADV_NORMAL; elog(DEBUG1, "pgfadvise: setting advice POSIX_FADV_NORMAL"); } /* POSIX_FADV_SEQUENTIAL */ else if (advice == PGF_SEQUENTIAL) { adviceFlag = POSIX_FADV_SEQUENTIAL; elog(DEBUG1, "pgfadvise: setting advice POSIX_FADV_SEQUENTIAL"); } /* POSIX_FADV_RANDOM */ else if (advice == PGF_RANDOM) { adviceFlag = POSIX_FADV_RANDOM; elog(DEBUG1, "pgfadvise: setting advice POSIX_FADV_RANDOM"); } else { elog(ERROR, "pgfadvise: invalid advice: %d", advice); return 2; } /* * Call posix_fadvise with the relevant advice on the file descriptor */ posix_fadvise(fd, 0, 0, adviceFlag); /* close the file */ FreeFile(fp); /* * OS things : Pages free */ pgfdv->pagesFree = sysconf(_SC_AVPHYS_PAGES); return 0; } #else static int pgfadvise_file(char *filename, int advice, pgfadviseStruct *pgfdv) { elog(ERROR, "POSIX_FADVISE UNSUPPORTED on your platform"); return 9; } #endif /* * pgfadvise is a function that handle the process to have a sharelock * on the relation and to walk the segments. * for each segment it call the posix_fadvise with the required flag * parameter */ PG_FUNCTION_INFO_V1(pgfadvise); Datum pgfadvise(PG_FUNCTION_ARGS) { /* SRF Stuff */ FuncCallContext *funcctx; pgfadvise_fctx *fctx; /* our structure use to return values */ pgfadviseStruct *pgfdv; /* our return value, 0 for success */ int result; /* The file we are working on */ char filename[MAXPGPATH]; /* stuff done only on the first call of the function */ if (SRF_IS_FIRSTCALL()) { MemoryContext oldcontext; Oid relOid = PG_GETARG_OID(0); text *forkName = PG_GETARG_TEXT_P(1); int advice = PG_GETARG_INT32(2); /* * Postgresql stuff to return a tuple */ TupleDesc tupdesc; /* create a function context for cross-call persistence */ funcctx = SRF_FIRSTCALL_INIT(); /* * switch to memory context appropriate for multiple function calls */ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); /* allocate memory for user context */ fctx = (pgfadvise_fctx *) palloc(sizeof(pgfadvise_fctx)); /* Build a tuple descriptor for our result type */ if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) elog(ERROR, "pgfadvise: return type must be a row type"); /* provide the tuple descriptor to the fonction structure */ fctx->tupd = tupdesc; /* open the current relation, accessShareLock */ // TODO use try_relation_open instead ? fctx->rel = relation_open(relOid, AccessShareLock); /* we get the common part of the filename of each segment of a relation */ fctx->relationpath = relpathpg(fctx->rel, forkName); /* Here we keep track of current action in all calls */ fctx->advice = advice; /* segcount is used to get the next segment of the current relation */ fctx->segcount = 0; /* And finally we keep track of our initialization */ elog(DEBUG1, "pgfadvise: init done for %s, in fork %s", fctx->relationpath, text_to_cstring(forkName)); funcctx->user_fctx = fctx; MemoryContextSwitchTo(oldcontext); } /* After the first call, we recover our context */ funcctx = SRF_PERCALL_SETUP(); fctx = funcctx->user_fctx; /* * If we are still looking the first segment * relationpath should not be suffixed */ if (fctx->segcount == 0) snprintf(filename, MAXPGPATH, "%s", fctx->relationpath); else snprintf(filename, MAXPGPATH, "%s.%u", fctx->relationpath, fctx->segcount); elog(DEBUG1, "pgfadvise: about to work with %s, current advice : %d", filename, fctx->advice); /* * Call posix_fadvise with the advice, returning the structure */ pgfdv = (pgfadviseStruct *) palloc(sizeof(pgfadviseStruct)); result = pgfadvise_file(filename, fctx->advice, pgfdv); /* * When we have work with all segments of the current relation * We exit from the SRF * Else we build and return the tuple for this segment */ if (result) { elog(DEBUG1, "pgfadvise: closing %s", fctx->relationpath); relation_close(fctx->rel, AccessShareLock); pfree(fctx); SRF_RETURN_DONE(funcctx); } else { /* * Postgresql stuff to return a tuple */ HeapTuple tuple; Datum values[PGFADVISE_COLS]; bool nulls[PGFADVISE_COLS]; /* initialize nulls array to build the tuple */ memset(nulls, 0, sizeof(nulls)); /* prepare the number of the next segment */ fctx->segcount++; /* Filename */ values[0] = CStringGetTextDatum( filename ); /* os page size */ values[1] = Int64GetDatum( (int64) pgfdv->pageSize ); /* number of pages used by segment */ values[2] = Int64GetDatum( (int64) ((pgfdv->filesize+pgfdv->pageSize-1)/pgfdv->pageSize) ); /* free page cache */ values[3] = Int64GetDatum( (int64) pgfdv->pagesFree ); /* Build the result tuple. */ tuple = heap_form_tuple(fctx->tupd, values, nulls); /* Ok, return results, and go for next call */ SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple)); } } #if defined(USE_POSIX_FADVISE) /* * pgfadvise_file */ static int pgfadvise_loader_file(char *filename, bool willneed, bool dontneed, VarBit *databit, pgfloaderStruct *pgfloader) { bits8 *sp; int bitlen; bits8 x; int i, k; /* * We use the AllocateFile(2) provided by PostgreSQL. We're going to * close it ourselves even if PostgreSQL close it anyway at transaction * end. */ FILE *fp; int fd; struct stat st; /* * OS things : Page size */ pgfloader->pageSize = sysconf(_SC_PAGESIZE); /* * we count the action we perform * both are theorical : we don't know if the page was or not in memory * when we call posix_fadvise */ pgfloader->pagesLoaded = 0; pgfloader->pagesUnloaded = 0; /* * Fopen and fstat file * fd will be provided to posix_fadvise * if there is no file, just return 1, it is expected to leave the SRF */ fp = AllocateFile(filename, "rb"); if (fp == NULL) return 1; fd = fileno(fp); if (fstat(fd, &st) == -1) { FreeFile(fp); elog(ERROR, "pgfadvise_loader: Can not stat object file: %s", filename); return 2; } elog(DEBUG1, "pgfadvise_loader: working on %s", filename); bitlen = VARBITLEN(databit); sp = VARBITS(databit); for (i = 0; i < bitlen - BITS_PER_BYTE; i += BITS_PER_BYTE, sp++) { x = *sp; /* Is this bit set ? */ for (k = 0; k < BITS_PER_BYTE; k++) { if (IS_HIGHBIT_SET(x)) { if (willneed) { (void) posix_fadvise(fd, ((i+k) * pgfloader->pageSize), pgfloader->pageSize, POSIX_FADV_WILLNEED); pgfloader->pagesLoaded++; } } else if (dontneed) { (void) posix_fadvise(fd, ((i+k) * pgfloader->pageSize), pgfloader->pageSize, POSIX_FADV_DONTNEED); pgfloader->pagesUnloaded++; } x <<= 1; } } /* * XXX this copy/paste of code to finnish to walk the bits is not pretty */ if (i < bitlen) { /* print the last partial byte */ x = *sp; for (k = i; k < bitlen; k++) { if (IS_HIGHBIT_SET(x)) { if (willneed) { (void) posix_fadvise(fd, (k * pgfloader->pageSize), pgfloader->pageSize, POSIX_FADV_WILLNEED); pgfloader->pagesLoaded++; } } else if (dontneed) { (void) posix_fadvise(fd, (k * pgfloader->pageSize), pgfloader->pageSize, POSIX_FADV_DONTNEED); pgfloader->pagesUnloaded++; } x <<= 1; } } FreeFile(fp); /* * OS things : Pages free */ pgfloader->pagesFree = sysconf(_SC_AVPHYS_PAGES); return 0; } #else static int pgfadvise_loader_file(char *filename, bool willneed, bool dontneed, VarBit *databit, pgfloaderStruct *pgfloader) { elog(ERROR, "POSIX_FADVISE UNSUPPORTED on your platform"); return 9; } #endif /* * * pgfadv_loader to handle work with varbit map of buffer cache. * it is actually used for loading/unloading block to/from buffer cache * */ PG_FUNCTION_INFO_V1(pgfadvise_loader); Datum pgfadvise_loader(PG_FUNCTION_ARGS) { Oid relOid = PG_GETARG_OID(0); text *forkName = PG_GETARG_TEXT_P(1); int segmentNumber = PG_GETARG_INT32(2); bool willneed = PG_GETARG_BOOL(3); bool dontneed = PG_GETARG_BOOL(4); VarBit *databit; /* our structure use to return values */ pgfloaderStruct *pgfloader; Relation rel; char *relationpath; char filename[MAXPGPATH]; /* our return value, 0 for success */ int result; /* * Postgresql stuff to return a tuple */ HeapTuple tuple; TupleDesc tupdesc; Datum values[PGFADVISE_LOADER_COLS]; bool nulls[PGFADVISE_LOADER_COLS]; if (PG_ARGISNULL(5)) elog(ERROR, "pgfadvise_loader: databit argument shouldn't be NULL"); else databit = PG_GETARG_VARBIT_P(5); /* initialize nulls array to build the tuple */ memset(nulls, 0, sizeof(nulls)); /* Build a tuple descriptor for our result type */ if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) elog(ERROR, "return type must be a row type"); /* open the current relation in accessShareLock */ rel = relation_open(relOid, AccessShareLock); /* we get the common part of the filename of each segment of a relation */ relationpath = relpathpg(rel, forkName); /* * If we are looking the first segment, * relationpath should not be suffixed */ if (segmentNumber == 0) snprintf(filename, MAXPGPATH, "%s", relationpath); else snprintf(filename, MAXPGPATH, "%s.%u", relationpath, (int) segmentNumber); /* * We don't need the relation anymore * the only purpose was to get a consistent filename * (if file disappear, an error is logged) */ relation_close(rel, AccessShareLock); /* * Call pgfadvise_loader with the varbit */ pgfloader = (pgfloaderStruct *) palloc(sizeof(pgfloaderStruct)); result = pgfadvise_loader_file(filename, willneed, dontneed, databit, pgfloader); if (result != 0) elog(ERROR, "Can't read file %s, fork(%s)", filename, text_to_cstring(forkName)); /* Filename */ values[0] = CStringGetTextDatum( filename ); /* os page size */ values[1] = Int64GetDatum( pgfloader->pageSize ); /* free page cache */ values[2] = Int64GetDatum( pgfloader->pagesFree ); /* pages loaded */ values[3] = Int64GetDatum( pgfloader->pagesLoaded ); /* pages unloaded */ values[4] = Int64GetDatum( pgfloader->pagesUnloaded ); /* Build and return the result tuple. */ tuple = heap_form_tuple(tupdesc, values, nulls); PG_RETURN_DATUM( HeapTupleGetDatum(tuple) ); } /* * pgfincore_file handle the mmaping, mincore process (and access file, etc.) */ static int pgfincore_file(char *filename, pgfincoreStruct *pgfncr) { int flag=1; int len, bitlen; bits8 *r; bits8 x = 0; register int64 pageIndex; /* * We use the AllocateFile(2) provided by PostgreSQL. We're going to * close it ourselves even if PostgreSQL close it anyway at transaction * end. */ FILE *fp; int fd; struct stat st; void *pa = (char *) 0; unsigned char *vec = (unsigned char *) 0; /* * OS Page size */ pgfncr->pageSize = sysconf(_SC_PAGESIZE); /* * Initialize counters */ pgfncr->pages_mem = 0; pgfncr->group_mem = 0; pgfncr->rel_os_pages = 0; /* * Fopen and fstat file * fd will be provided to posix_fadvise * if there is no file, just return 1, it is expected to leave the SRF */ fp = AllocateFile(filename, "rb"); if (fp == NULL) return 1; fd = fileno(fp); if (fstat(fd, &st) == -1) { FreeFile(fp); elog(ERROR, "Can not stat object file : %s", filename); return 2; } /* * if file ok * then process */ if (st.st_size != 0) { /* number of pages in the current file */ pgfncr->rel_os_pages = (st.st_size+pgfncr->pageSize-1)/pgfncr->pageSize; /* TODO We need to split mmap size to be sure (?) to be able to mmap */ pa = mmap(NULL, st.st_size, PROT_NONE, MAP_SHARED, fd, 0); if (pa == MAP_FAILED) { FreeFile(fp); elog(ERROR, "Can not mmap object file : %s, errno = %i,%s\nThis error can happen if there is not enought space in memory to do the projection. Please mail cedric@villemain.org with '[pgfincore] ENOMEM' as subject.", filename, errno, strerror(errno)); return 3; } /* Prepare our vector containing all blocks information */ vec = calloc(1, (st.st_size+pgfncr->pageSize-1)/pgfncr->pageSize); if ((void *)0 == vec) { munmap(pa, st.st_size); FreeFile(fp); elog(ERROR, "Can not calloc object file : %s", filename); return 4; } /* Affect vec with mincore */ if (mincore(pa, st.st_size, vec) != 0) { free(vec); munmap(pa, st.st_size); FreeFile(fp); elog(ERROR, "mincore(%p, %lld, %p): %s\n", pa, (long long int)st.st_size, vec, strerror(errno)); return 5; } /* * prepare the bit string */ bitlen = (st.st_size+pgfncr->pageSize-1)/pgfncr->pageSize; len = VARBITTOTALLEN(bitlen); /* * set to 0 so that *r is always initialised and string is zero-padded * XXX: do we need to free that ? */ pgfncr->databit = (VarBit *) palloc0(len); SET_VARSIZE(pgfncr->databit, len); VARBITLEN(pgfncr->databit) = bitlen; r = VARBITS(pgfncr->databit); x = HIGHBIT; /* handle the results */ for (pageIndex = 0; pageIndex <= pgfncr->rel_os_pages; pageIndex++) { // block in memory if (vec[pageIndex] & 1) { pgfncr->pages_mem++; *r |= x; elog (DEBUG5, "in memory blocks : %lld / %lld", (long long int) pageIndex, (long long int) pgfncr->rel_os_pages); /* we flag to detect contigous blocks in the same state */ if (flag) pgfncr->group_mem++; flag = 0; } else flag=1; x >>= 1; if (x == 0) { x = HIGHBIT; r++; } } } elog(DEBUG1, "pgfincore %s: %lld of %lld block in linux cache, %lld groups", filename, (long long int) pgfncr->pages_mem, (long long int) pgfncr->rel_os_pages, (long long int) pgfncr->group_mem); /* * free and close */ free(vec); munmap(pa, st.st_size); FreeFile(fp); /* * OS things : Pages free */ pgfncr->pagesFree = sysconf(_SC_AVPHYS_PAGES); return 0; } /* * pgfincore is a function that handle the process to have a sharelock * on the relation and to walk the segments. * for each segment it call the appropriate function depending on 'action' * parameter */ PG_FUNCTION_INFO_V1(pgfincore); Datum pgfincore(PG_FUNCTION_ARGS) { /* SRF Stuff */ FuncCallContext *funcctx; pgfincore_fctx *fctx; /* our structure use to return values */ pgfincoreStruct *pgfncr; /* our return value, 0 for success */ int result; /* The file we are working on */ char filename[MAXPGPATH]; /* stuff done only on the first call of the function */ if (SRF_IS_FIRSTCALL()) { MemoryContext oldcontext; Oid relOid = PG_GETARG_OID(0); text *forkName = PG_GETARG_TEXT_P(1); bool getvector = PG_GETARG_BOOL(2); /* * Postgresql stuff to return a tuple */ TupleDesc tupdesc; /* create a function context for cross-call persistence */ funcctx = SRF_FIRSTCALL_INIT(); /* * switch to memory context appropriate for multiple function calls */ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); /* allocate memory for user context */ fctx = (pgfincore_fctx *) palloc(sizeof(pgfincore_fctx)); /* Build a tuple descriptor for our result type */ if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) elog(ERROR, "pgfadvise: return type must be a row type"); /* provide the tuple descriptor to the fonction structure */ fctx->tupd = tupdesc; /* are we going to grab and output the varbit data (can be large) */ fctx->getvector = getvector; /* open the current relation, accessShareLock */ // TODO use try_relation_open instead ? fctx->rel = relation_open(relOid, AccessShareLock); /* we get the common part of the filename of each segment of a relation */ fctx->relationpath = relpathpg(fctx->rel, forkName); /* segcount is used to get the next segment of the current relation */ fctx->segcount = 0; /* And finally we keep track of our initialization */ elog(DEBUG1, "pgfincore: init done for %s, in fork %s", fctx->relationpath, text_to_cstring(forkName)); funcctx->user_fctx = fctx; MemoryContextSwitchTo(oldcontext); } /* After the first call, we recover our context */ funcctx = SRF_PERCALL_SETUP(); fctx = funcctx->user_fctx; /* * If we are still looking the first segment * relationpath should not be suffixed */ if (fctx->segcount == 0) snprintf(filename, MAXPGPATH, "%s", fctx->relationpath); else snprintf(filename, MAXPGPATH, "%s.%u", fctx->relationpath, fctx->segcount); elog(DEBUG1, "pgfincore: about to work with %s", filename); /* * Call pgfincore with the advice, returning the structure */ pgfncr = (pgfincoreStruct *) palloc(sizeof(pgfincoreStruct)); result = pgfincore_file(filename, pgfncr); /* * When we have work with all segment of the current relation, test success * We exit from the SRF */ if (result) { elog(DEBUG1, "pgfincore: closing %s", fctx->relationpath); relation_close(fctx->rel, AccessShareLock); pfree(fctx); SRF_RETURN_DONE(funcctx); } else { /* * Postgresql stuff to return a tuple */ HeapTuple tuple; Datum values[PGFINCORE_COLS]; bool nulls[PGFINCORE_COLS]; /* initialize nulls array to build the tuple */ memset(nulls, 0, sizeof(nulls)); /* Filename */ values[0] = CStringGetTextDatum(filename); /* Segment Number */ values[1] = Int32GetDatum(fctx->segcount); /* os page size */ values[2] = Int64GetDatum(pgfncr->pageSize); /* number of pages used by segment */ values[3] = Int64GetDatum(pgfncr->rel_os_pages); /* number of pages in OS cache */ values[4] = Int64GetDatum(pgfncr->pages_mem); /* number of group of contigous page in os cache */ values[5] = Int64GetDatum(pgfncr->group_mem); /* free page cache */ values[6] = Int64GetDatum(pgfncr->pagesFree); /* the map of the file with bit set for in os cache page */ if (fctx->getvector && pgfncr->rel_os_pages) { values[7] = VarBitPGetDatum(pgfncr->databit); } else { nulls[7] = true; values[7] = (Datum) NULL; } /* Build the result tuple. */ tuple = heap_form_tuple(fctx->tupd, values, nulls); /* prepare the number of the next segment */ fctx->segcount++; /* Ok, return results, and go for next call */ SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple)); } } pgfincore-1.1.2/pgfincore.control.in000066400000000000000000000002331225162325000174460ustar00rootroot00000000000000# EXTENSION extension comment = 'EXTCOMMENT' default_version = 'EXTVERSION' module_pathname = '$libdir/EXTENSION' directory = EXTENSION relocatable = true pgfincore-1.1.2/pgfincore.sql000066400000000000000000000074331225162325000161710ustar00rootroot00000000000000 -- -- SYSCONF -- CREATE OR REPLACE FUNCTION pgsysconf(OUT os_page_size bigint, OUT os_pages_free bigint, OUT os_total_pages bigint) RETURNS record AS '$libdir/pgfincore' LANGUAGE C; CREATE OR REPLACE FUNCTION pgsysconf_pretty(OUT os_page_size text, OUT os_pages_free text, OUT os_total_pages text) RETURNS record AS ' select pg_size_pretty(os_page_size) as os_page_size, pg_size_pretty(os_pages_free * os_page_size) as os_pages_free, pg_size_pretty(os_total_pages * os_page_size) as os_total_pages from pgsysconf()' LANGUAGE SQL; -- -- PGFADVISE -- CREATE OR REPLACE FUNCTION pgfadvise(IN regclass, IN text, IN int, OUT relpath text, OUT os_page_size bigint, OUT rel_os_pages bigint, OUT os_pages_free bigint) RETURNS setof record AS '$libdir/pgfincore' LANGUAGE C; CREATE OR REPLACE FUNCTION pgfadvise_willneed(IN regclass, OUT relpath text, OUT os_page_size bigint, OUT rel_os_pages bigint, OUT os_pages_free bigint) RETURNS setof record AS 'SELECT pgfadvise($1, ''main'', 10)' LANGUAGE SQL; CREATE OR REPLACE FUNCTION pgfadvise_dontneed(IN regclass, OUT relpath text, OUT os_page_size bigint, OUT rel_os_pages bigint, OUT os_pages_free bigint) RETURNS setof record AS 'SELECT pgfadvise($1, ''main'', 20)' LANGUAGE SQL; CREATE OR REPLACE FUNCTION pgfadvise_normal(IN regclass, OUT relpath text, OUT os_page_size bigint, OUT rel_os_pages bigint, OUT os_pages_free bigint) RETURNS setof record AS 'SELECT pgfadvise($1, ''main'', 30)' LANGUAGE SQL; CREATE OR REPLACE FUNCTION pgfadvise_sequential(IN regclass, OUT relpath text, OUT os_page_size bigint, OUT rel_os_pages bigint, OUT os_pages_free bigint) RETURNS setof record AS 'SELECT pgfadvise($1, ''main'', 40)' LANGUAGE SQL; CREATE OR REPLACE FUNCTION pgfadvise_random(IN regclass, OUT relpath text, OUT os_page_size bigint, OUT rel_os_pages bigint, OUT os_pages_free bigint) RETURNS setof record AS 'SELECT pgfadvise($1, ''main'', 50)' LANGUAGE SQL; -- -- PGFADVISE_LOADER -- CREATE OR REPLACE FUNCTION pgfadvise_loader(IN regclass, IN text, IN int, IN bool, IN bool, IN varbit, OUT relpath text, OUT os_page_size bigint, OUT os_pages_free bigint, OUT pages_loaded bigint, OUT pages_unloaded bigint) RETURNS setof record AS '$libdir/pgfincore' LANGUAGE C; CREATE OR REPLACE FUNCTION pgfadvise_loader(IN regclass, IN int, IN bool, IN bool, IN varbit, OUT relpath text, OUT os_page_size bigint, OUT os_pages_free bigint, OUT pages_loaded bigint, OUT pages_unloaded bigint) RETURNS setof record AS 'SELECT pgfadvise_loader($1, ''main'', $2, $3, $4, $5)' LANGUAGE SQL; -- -- PGFINCORE -- CREATE OR REPLACE FUNCTION pgfincore(IN regclass, IN text, IN bool, OUT relpath text, OUT segment int, OUT os_page_size bigint, OUT rel_os_pages bigint, OUT pages_mem bigint, OUT group_mem bigint, OUT os_pages_free bigint, OUT databit varbit) RETURNS setof record AS '$libdir/pgfincore' LANGUAGE C; CREATE OR REPLACE FUNCTION pgfincore(IN regclass, IN bool, OUT relpath text, OUT segment int, OUT os_page_size bigint, OUT rel_os_pages bigint, OUT pages_mem bigint, OUT group_mem bigint, OUT os_pages_free bigint, OUT databit varbit) RETURNS setof record AS 'SELECT * from pgfincore($1, ''main'', $2)' LANGUAGE SQL; CREATE OR REPLACE FUNCTION pgfincore(IN regclass, OUT relpath text, OUT segment int, OUT os_page_size bigint, OUT rel_os_pages bigint, OUT pages_mem bigint, OUT group_mem bigint, OUT os_pages_free bigint, OUT databit varbit) RETURNS setof record AS 'SELECT * from pgfincore($1, ''main'', false)' LANGUAGE SQL; pgfincore-1.1.2/sql/000077500000000000000000000000001225162325000142645ustar00rootroot00000000000000pgfincore-1.1.2/sql/pgfincore.ext.sql000066400000000000000000000022771225162325000175700ustar00rootroot00000000000000CREATE EXTENSION pgfincore; -- -- test SYSCONF -- select true from pgsysconf(); select true from pgsysconf_pretty(); -- -- make a temp table to use below -- CREATE TEMP TABLE test AS SELECT generate_series(1,256) as a; -- -- this is not perfect testing but it is hard to predict what the OS will do -- for *sure* -- -- -- test fadvise_loader -- select true from pgfadvise_loader('test', 0, true, true, B'1010'); select true from pgfadvise_loader('test', 0, true, false, B'1010'); select true from pgfadvise_loader('test', 0, false, true, B'1010'); select true from pgfadvise_loader('test', 0, false, false, B'1010'); -- must not fail on empty databit input select true from pgfadvise_loader('test', 0, false, false, B''); -- ERROR on NULL databit input select true from pgfadvise_loader('test', 0, false, false, NULL); -- -- test pgfincore -- select true from pgfincore('test', true); select true from pgfincore('test'); -- -- test DONTNEED, WILLNEED -- select true from pgfadvise_willneed('test'); select true from pgfadvise_dontneed('test'); -- -- test PGFADVISE flags -- select true from pgfadvise_sequential('test'); select true from pgfadvise_random('test'); select true from pgfadvise_normal('test'); pgfincore-1.1.2/sql/pgfincore.sql000066400000000000000000000024171225162325000167650ustar00rootroot00000000000000SET client_min_messages = warning; \set ECHO none \i pgfincore.sql \set ECHO all RESET client_min_messages; -- -- test SYSCONF -- select true from pgsysconf(); select true from pgsysconf_pretty(); -- -- make a temp table to use below -- CREATE TEMP TABLE test AS SELECT generate_series(1,256) as a; -- -- this is not perfect testing but it is hard to predict what the OS will do -- for *sure* -- -- -- test fadvise_loader -- select true from pgfadvise_loader('test', 0, true, true, B'1010'); select true from pgfadvise_loader('test', 0, true, false, B'1010'); select true from pgfadvise_loader('test', 0, false, true, B'1010'); select true from pgfadvise_loader('test', 0, false, false, B'1010'); -- must not fail on empty databit input select true from pgfadvise_loader('test', 0, false, false, B''); -- ERROR on NULL databit input select true from pgfadvise_loader('test', 0, false, false, NULL); -- -- test pgfincore -- select true from pgfincore('test', true); select true from pgfincore('test'); -- -- test DONTNEED, WILLNEED -- select true from pgfadvise_willneed('test'); select true from pgfadvise_dontneed('test'); -- -- test PGFADVISE flags -- select true from pgfadvise_sequential('test'); select true from pgfadvise_random('test'); select true from pgfadvise_normal('test'); pgfincore-1.1.2/uninstall_pgfincore.sql000066400000000000000000000011211225162325000202460ustar00rootroot00000000000000 DROP FUNCTION pgsysconf_pretty(); DROP FUNCTION pgsysconf(); DROP FUNCTION pgfadvise_willneed(regclass); DROP FUNCTION pgfadvise_dontneed(regclass); DROP FUNCTION pgfadvise_normal(regclass); DROP FUNCTION pgfadvise_sequential(regclass); DROP FUNCTION pgfadvise_random(regclass); DROP FUNCTION pgfadvise(regclass, text, int); DROP FUNCTION pgfadvise_loader(regclass, text, int, bool, bool, varbit); DROP FUNCTION pgfadvise_loader(regclass, int, bool, bool, varbit); DROP FUNCTION pgfincore(regclass); DROP FUNCTION pgfincore(regclass, bool); DROP FUNCTION pgfincore(regclass, text, bool);