pax_global_header00006660000000000000000000000064145763370770014535gustar00rootroot0000000000000052 comment=753be8ee758c775af4c64e7794f8083a5d3960a5 pgfaceting-0.2.0/000077500000000000000000000000001457633707700136435ustar00rootroot00000000000000pgfaceting-0.2.0/.gitignore000066400000000000000000000000101457633707700156220ustar00rootroot00000000000000results pgfaceting-0.2.0/LICENSE000066400000000000000000000026571457633707700146620ustar00rootroot00000000000000Copyright 2022 Ants Aasma Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. pgfaceting-0.2.0/META.json000066400000000000000000000016441457633707700152710ustar00rootroot00000000000000{ "name": "pgfaceting", "abstract": "Quickly calculate facet counts using inverted index built with roaring bitmaps.", "description": "", "version": "0.2.0", "maintainer": [ "Ants Aasma " ], "license": "bsd", "prereqs": { "runtime": { "requires": { "PostgreSQL": "11.0" } } }, "provides": { "pgfaceting": { "file": "sql/pgfaceting--0.1.0.sql", "docfile" : "README.md", "version": "0.2.0" } }, "resources": { "repository": { "url": "https://github.com/cybertec-postgresql/pgfaceting.git", "web": "https://github.com/cybertec-postgresql/pgfaceting", "type": "git" } }, "tags" : ["count", "aggregate", "performance", "bitmap"], "meta-spec": { "version": "1.0.0", "url": "http://pgxn.org/meta/spec.txt" }, "release_status" : "unstable" } pgfaceting-0.2.0/Makefile000066400000000000000000000004031457633707700153000ustar00rootroot00000000000000EXTENSION = pgfaceting DATA = sql/pgfaceting--0.2.0.sql TESTS = $(wildcard test/sql/*.sql) REGRESS = $(patsubst test/sql/%.sql,%,$(TESTS)) REGRESS_OPTS = --inputdir=test PG_CONFIG = pg_config PGXS := $(shell $(PG_CONFIG) --pgxs) include $(PGXS) pgfaceting-0.2.0/README.md000066400000000000000000000136651457633707700151350ustar00rootroot00000000000000# pgfaceting PostgreSQL extension to quickly calculate facet counts using inverted index built with [roaring bitmaps](https://roaringbitmap.org/). Requires [pg_roaringbitmap](https://github.com/ChenHuajun/pg_roaringbitmap) to be installed. Faceting means counting number occurrences of each value in a result set for a set of attributes. Typical example of faceting is a web shop where you can see how many items are remaining after filtering your search by red, green or blue, and how many when filtering by size small, medium or large. Work on this project has been sponsored by [Xenit](https://xenit.eu/). ## Build and install make install make installcheck psql -c "create extension roaringbitmap" -c "create extension pgfaceting" ## Usage pgfaceting creates and maintains two extra tables for your main table. `tbl_facets` contains for each facet and value combination a list of id values for rows containing that combination. The list is stored as a roaring bitmap for quick intersection and cardinality operations. Because updating this list is a heavy operation any changes to the main table get stored in `tbl_facets_deltas` as a combination of facet, value, id and +1 or -1 depending on the kind of update. A periodic maintenance job is responsible for merging deltas into the main facets table. Currently only 32bit integer id columns are supported. When pg\_roaringbitmap adds support for 64bit bitmaps then int8 and possibly ctid could be supported. ### Adding faceting to a table SELECT faceting.add_faceting_to_table( 'documents', key => 'id', facets => array[ faceting.datetrunc_facet('created', 'month'), faceting.datetrunc_facet('finished', 'month'), faceting.plain_facet('category_id'), faceting.plain_facet('type'), faceting.bucket_facet('size', buckets => array[0,1000,5000,10000,50000,100000,500000]) ] ); The `add_faceting_to_table()` adds the facets tables and populates the contents. It takes an array of facets to extract from each row. * `plain_facet(col name)` - Takes the column value as is as the facet value. * `datetrunc_facet(col name, precision text)` - Applies a date\_trunc function on a column to get the facet value. Useful for timebucketing (yearly, monthly, etc.) * `bucket_facet(col name, buckets anyarray)` - Assigns a continuous variable (price, weight, etc.) to a set of buckets and stores the index of the chosen bucket as the facet value. For merging changes create a periodic job that runs: CALL faceting.run_maintenance(); This will run delta merging on all faceted tables. There is also a function for maintaining a single table: SELECT faceting.merge_deltas('documents'::regclass); ### Querying facets Getting top 10 values for each kind of facet: SELECT * FROM faceting.top_values('documents'::regclass, n => 10); We can also filter by some facets and get the results of other facets: SELECT * FROM faceting.count_results('documents'::regclass, filters => array[row('category_id', '24'), row('type', 'image/jpeg')]::faceting.facet_filter[]); For advanced usage the inverted index tables can be accessed directly. WITH lookup AS ( SELECT id >> 20 AS chunk_id, rb_build_agg(id) postinglist FROM documents WHERE ... ) SELECT facet_id, facet_value, sum(rb_and_cardinality(flt.postinglist, fct.postinglist)) FROM lookup flt JOIN documents_facets USING (chunk_id) GROUP BY 1, 2; ### How fast is it Calculating facets for 61% of rows in 100M row table: -- 24 vcore parallel seq scan postgres=# SELECT facet_name, count(distinct facet_value), sum(cardinality) FROM (SELECT facet_name, facet_value, COUNT(*) cardinality FROM test2.documents d, LATERAL (VALUES ('created', date_trunc('month', created)::text), ('finished', date_trunc('month', finished)::text), ('type', type::text), ('size', width_bucket(size, array[0,1000,5000,10000,50000,100000,500000])::text) ) t(facet_name, facet_value) WHERE category_id = 24 GROUP BY 1, 2) count_results GROUP BY 1; facet_name | count | sum ------------+-------+---------- created | 154 | 60812252 finished | 154 | 60812252 size | 7 | 60812252 type | 8 | 60812252 (4 rows) Time: 18440.061 ms (00:18.440) -- Single core only postgres=# SET max_parallel_workers_per_gather = 0; SET Time: 0.206 ms postgres=# SELECT facet_name, count(distinct facet_value), sum(cardinality) FROM (SELECT facet_name, facet_value, COUNT(*) cardinality FROM test2.documents d, LATERAL (VALUES ('created', date_trunc('month', created)::text), ('finished', date_trunc('month', finished)::text), ('type', type::text), ('size', width_bucket(size, array[0,1000,5000,10000,50000,100000,500000])::text) ) t(facet_name, facet_value) WHERE category_id = 24 GROUP BY 1, 2) count_results GROUP BY 1; facet_name | count | sum ------------+-------+---------- created | 154 | 60812252 finished | 154 | 60812252 size | 7 | 60812252 type | 8 | 60812252 (4 rows) Time: 222019.758 ms (03:42.020) -- Using facets index postgres=# SELECT facet_name, count(distinct facet_value), sum(cardinality) FROM faceting.count_results('documents'::regclass, filters => array[row('category_id', 24)]::faceting.facet_filter[]) GROUP BY 1; facet_name | count | sum ------------+-------+---------- created | 154 | 60812252 finished | 154 | 60812252 size | 7 | 60812252 type | 8 | 60812252 (4 rows) Time: 155.228 ms pgfaceting-0.2.0/pgfaceting.control000066400000000000000000000002761457633707700173610ustar00rootroot00000000000000# faceting extension comment = 'fast faceting queries using an inverted index' default_version = '0.2.0' relocatable = false schema = 'faceting' requires = 'roaringbitmap' superuser = false pgfaceting-0.2.0/sql/000077500000000000000000000000001457633707700144425ustar00rootroot00000000000000pgfaceting-0.2.0/sql/pgfaceting--0.2.0.sql000066400000000000000000000640761457633707700200170ustar00rootroot00000000000000CREATE FUNCTION faceting._identifier_append(ident text, append text) RETURNS text LANGUAGE SQL AS $$ SELECT CASE WHEN right(ident, 1) = '"' THEN substr(ident, 1, length(ident) - 1) || append || '"' ELSE ident || append END; $$; CREATE FUNCTION faceting._name_only(ident text) RETURNS text LANGUAGE SQL AS $$ SELECT regexp_replace(ident, '^([^"]*|"([^\"]|\\")*")\.', ''); $$; CREATE FUNCTION faceting._qualified(schemaname text, tablename text) RETURNS text LANGUAGE SQL AS $$ SELECT format('%s.%s', quote_ident(schemaname), quote_ident(tablename)); $$; CREATE TABLE faceting.faceted_table ( table_id oid primary key, schemaname text, tablename text, facets_table text, delta_table text, key name, key_type text, chunk_bits int ); SELECT pg_catalog.pg_extension_config_dump('faceting.faceted_table', ''); CREATE TABLE faceting.facet_definition ( table_id oid NOT NULL REFERENCES faceted_table (table_id), facet_id int NOT NULL, facet_name text NOT NULL, facet_type text NOT NULL, base_column name, params jsonb, is_multi bool not null, supports_delta bool not null, PRIMARY KEY (table_id, facet_id) ); CREATE UNIQUE INDEX facet_definition_uniq_name ON faceting.facet_definition (table_id, facet_name); SELECT pg_catalog.pg_extension_config_dump('faceting.facet_definition', ''); CREATE FUNCTION faceting.add_faceting_to_table(p_table regclass, key name, facets facet_definition[], chunk_bits int = 20, keep_deltas bool = true, populate bool = true) RETURNS void LANGUAGE plpgsql AS $$ DECLARE schemaname text; tablename text; facet_tablename text; delta_tablename text; v_table_id int; v_facet_defs faceting.facet_definition[]; key_type text; BEGIN SELECT relname, nspname INTO tablename, schemaname FROM pg_class c JOIN pg_namespace n ON relnamespace = n.oid WHERE c.oid = p_table::oid; IF NOT FOUND THEN RAISE EXCEPTION 'Cannot find table %', p_table; END IF; -- Can't make us of highest bit of int4 because don't want to dealt with negative values IF chunk_bits NOT BETWEEN 1 AND 31 THEN RAISE EXCEPTION 'Invalid number of bits per chunk: %', chunk_bits; END IF; -- Default chunking size is 1Mi. /* TODO: namespace qualify to be in the same schema as parent table */ facet_tablename := faceting._identifier_append(tablename, '_facets'); delta_tablename := faceting._identifier_append(tablename, '_facets_deltas'); SELECT t.typname INTO key_type FROM pg_attribute a JOIN pg_type t ON t.oid = a.atttypid WHERE attrelid = p_table::oid AND attname = key; IF NOT FOUND THEN RAISE EXCEPTION 'Key column % not found in %s.%s', key, schemaname, tablename; ELSIF key_type NOT IN ('int2', 'int4', 'int8') THEN RAISE EXCEPTION 'Key column type % is not supported.', key_type; END IF; INSERT INTO faceting.faceted_table (table_id, schemaname, tablename, facets_table, delta_table, key, key_type, chunk_bits) VALUES (p_table::oid, schemaname, tablename, facet_tablename, CASE WHEN keep_deltas THEN delta_tablename END, key, key_type, chunk_bits) RETURNING table_id INTO v_table_id; WITH stored_definitions AS ( INSERT INTO faceting.facet_definition (table_id, facet_id, facet_name, facet_type, base_column, params, is_multi, supports_delta) SELECT v_table_id, assigned_id, facet_name, facet_type, base_column, params, is_multi, supports_delta FROM UNNEST(facets) WITH ORDINALITY AS x(_, _, facet_name, facet_type, base_column, params, is_multi, supports_delta, assigned_id) RETURNING *) SELECT array_agg(f) INTO v_facet_defs FROM stored_definitions f; /* TODO: allow NULLs to be stored for PG v15+ */ -- Create facet storage EXECUTE format($sql$ CREATE TABLE %s ( facet_id int4 not null, chunk_id int4 not null, facet_value text collate "C" null, postinglist roaringbitmap not null, primary key (facet_id, facet_value, chunk_id) ); ALTER TABLE %s SET (toast_tuple_target = 8160);$sql$, faceting._qualified(schemaname, facet_tablename), faceting._qualified(schemaname, facet_tablename)); IF keep_deltas THEN -- Delta storage EXECUTE format($sql$ CREATE TABLE %s ( facet_id int4 not null, facet_value text collate "C" null, posting %s not null, delta int2, primary key (facet_id, facet_value, posting) ); $sql$, faceting._qualified(schemaname, delta_tablename), key_type); PERFORM faceting.create_delta_trigger(v_table_id); END IF; IF populate THEN PERFORM faceting.populate_facets(v_table_id, false); END IF; END; $$; CREATE FUNCTION faceting._get_values_clause(fdef facet_definition, extra_cols text, table_alias text) RETURNS text LANGUAGE plpgsql AS $$ DECLARE result text; BEGIN EXECUTE format('SELECT faceting.%s_facet_values($1, $2, $3)', fdef.facet_type) INTO result USING fdef, extra_cols, table_alias; RETURN result; END; $$; CREATE FUNCTION faceting._get_subquery_clause(fdef facet_definition, extra_cols text, table_alias text) RETURNS text LANGUAGE plpgsql AS $$ DECLARE result text; BEGIN EXECUTE format('SELECT faceting.%s_facet_subquery($1, $2, $3)', fdef.facet_type) INTO result USING fdef, extra_cols, table_alias; RETURN result; END; $$; CREATE FUNCTION faceting.add_facets(p_table regclass, facets facet_definition[], populate bool = true) RETURNS SETOF int4 LANGUAGE plpgsql AS $$ DECLARE v_table_id oid; tdef faceting.faceted_table; highest_facet_id int4; v_facet_names text[]; v_facet_ids int4[]; BEGIN v_table_id := p_table::oid; SELECT t.* INTO tdef FROM faceting.faceted_table t WHERE t.table_id = v_table_id; IF NOT FOUND THEN RAISE EXCEPTION 'Table % is not faceted', p_table; END IF; SELECT MAX(facet_id) INTO highest_facet_id FROM faceting.facet_definition WHERE table_id = v_table_id; WITH stored_definitions AS ( INSERT INTO faceting.facet_definition (table_id, facet_id, facet_name, facet_type, base_column, params, is_multi, supports_delta) SELECT v_table_id, highest_facet_id + assigned_id, facet_name, facet_type, base_column, params, is_multi, supports_delta FROM UNNEST(facets) WITH ORDINALITY AS x(_, _, facet_name, facet_type, base_column, params, is_multi, supports_delta, assigned_id) RETURNING *) SELECT array_agg(f.facet_name), array_agg(f.facet_id) INTO v_facet_names, v_facet_ids FROM stored_definitions f; IF tdef.delta_table IS NOT NULL THEN PERFORM faceting.create_delta_trigger(v_table_id); END IF; IF populate THEN PERFORM faceting.populate_facets(v_table_id, false, facets := v_facet_names); END IF; RETURN QUERY SELECT unnest(v_facet_ids); END; $$; CREATE FUNCTION faceting.drop_facets(p_table regclass, facets text[]) RETURNS SETOF text LANGUAGE plpgsql AS $$ DECLARE v_table_id oid; tdef faceting.faceted_table; v_dropped_names text[]; v_dropped_ids int4[]; BEGIN v_table_id := p_table::oid; SELECT t.* INTO tdef FROM faceting.faceted_table t WHERE t.table_id = v_table_id; IF NOT FOUND THEN RAISE EXCEPTION 'Table % is not faceted', p_table; END IF; WITH dropped_facets AS ( DELETE FROM faceting.facet_definition WHERE table_id = v_table_id AND facet_name = ANY (facets) RETURNING facet_id, facet_name) SELECT array_agg(facet_id), array_agg(facet_name) INTO v_dropped_ids, v_dropped_names FROM dropped_facets; EXECUTE format('DELETE FROM %s WHERE facet_id = ANY ($1)', faceting._qualified(tdef.schemaname, tdef.facets_table)) USING v_dropped_ids; IF tdef.delta_table IS NOT NULL THEN -- Important to replace trigger first so deletion runs with a new snapshot PERFORM faceting.create_delta_trigger(v_table_id); EXECUTE format('DELETE FROM %s WHERE facet_id = ANY ($1)', faceting._qualified(tdef.schemaname, tdef.delta_table)) USING v_dropped_ids; END IF; RETURN QUERY SELECT unnest(v_dropped_names); END; $$; CREATE FUNCTION faceting.drop_faceting(p_table regclass) RETURNS bool LANGUAGE plpgsql AS $$ DECLARE v_table_id oid; tdef faceting.faceted_table; tfunc_name text; trg_name text; BEGIN v_table_id := p_table::oid; SELECT t.* INTO tdef FROM faceting.faceted_table t WHERE t.table_id = v_table_id; IF NOT FOUND THEN RAISE NOTICE 'Table % is not faceted', p_table; RETURN true; END IF; EXECUTE format('LOCK TABLE %s', faceting._qualified(tdef.schemaname, tdef.tablename)); DELETE FROM faceting.facet_definition WHERE table_id = v_table_id; DELETE FROM faceting.faceted_table WHERE table_id = v_table_id; EXECUTE format('DROP TABLE %s', faceting._qualified(tdef.schemaname, tdef.facets_table)); IF tdef.delta_table IS NOT NULL THEN SELECT tn.tfunc_name, tn.trg_name INTO tfunc_name, trg_name FROM faceting._trigger_names(tdef.tablename) tn; EXECUTE format('DROP TRIGGER %s ON %s', trg_name, faceting._qualified(tdef.schemaname, tdef.tablename)); EXECUTE format('DROP FUNCTION %s', faceting._qualified(tdef.schemaname, tfunc_name)); EXECUTE format('DROP TABLE %s', faceting._qualified(tdef.schemaname, tdef.delta_table)); END IF; RETURN true; END $$; CREATE FUNCTION faceting.populate_facets_query(p_table_id oid, facets text[] = null) RETURNS text LANGUAGE plpgsql AS $$ DECLARE sql text; values_entries text[]; subquery_entries text[]; clauses text[]; v_chunk_bits int; v_keycol name; tdef faceting.faceted_table; BEGIN SELECT t.* INTO tdef FROM faceting.faceted_table t WHERE t.table_id = p_table_id; SELECT chunk_bits, key INTO v_chunk_bits, v_keycol FROM faceting.faceted_table WHERE table_id = p_table_id; SELECT array_agg(faceting._get_values_clause(fd, '', 'd.') ORDER BY facet_id) INTO values_entries FROM faceting.facet_definition fd WHERE (facets IS NULL OR fd.facet_name = ANY (facets)) AND table_id = p_table_id AND NOT fd.is_multi; SELECT array_agg(faceting._get_subquery_clause(fd, '', 'd.')) INTO subquery_entries FROM faceting.facet_definition fd WHERE (facets IS NULL OR fd.facet_name = ANY (facets)) AND table_id = p_table_id AND fd.is_multi; IF array_length(values_entries, 1) > 0 THEN clauses := array[format('VALUES %s', array_to_string(values_entries, E',\n '))]; ELSE clauses := array[]; END IF; clauses := clauses || subquery_entries; sql := format($sql$ SELECT facet_id, (%s >> %s)::int4 chunk_id, facet_value collate "POSIX", rb_build_agg((%s & ((1 << %s) - 1))::int4 ORDER BY %s) FROM %s d, LATERAL ( %s ) t(facet_id, facet_value) WHERE facet_value IS NOT NULL GROUP BY facet_id, facet_value collate "POSIX", chunk_id $sql$, v_keycol, v_chunk_bits, v_keycol, v_chunk_bits, v_keycol, p_table_id::regclass::text, array_to_string(clauses, E'\n UNION ALL\n ') ); RETURN sql; END; $$; CREATE FUNCTION _trigger_names(tablename text, OUT tfunc_name text, OUT trg_name text) LANGUAGE plpgsql AS $$ BEGIN tfunc_name := faceting._identifier_append(tablename, '_facets_trigger'); trg_name := faceting._identifier_append(tablename, '_facets_update'); RETURN; END; $$; CREATE FUNCTION create_delta_trigger(p_table_id oid, p_create bool = true) RETURNS text LANGUAGE plpgsql AS $$ DECLARE tfunc_name text; trg_name text; sql text; tdef faceting.faceted_table; insert_values text[]; insert_subqueries text[]; insert_clauses text[]; delete_values text[]; delete_subqueries text[]; delete_clauses text[]; base_columns text[]; BEGIN SELECT t.* INTO tdef FROM faceting.faceted_table t WHERE t.table_id = p_table_id; SELECT tn.tfunc_name, tn.trg_name INTO tfunc_name, trg_name FROM faceting._trigger_names(tdef.tablename) tn; SELECT array_agg(faceting._get_values_clause(fd, format(', NEW.%I, 1', tdef.key) , 'NEW.') ORDER BY facet_id), array_agg(faceting._get_values_clause(fd, format(', OLD.%I, -1', tdef.key) , 'OLD.') ORDER BY facet_id), array_agg(fd.base_column) INTO insert_values, delete_values, base_columns FROM faceting.facet_definition fd WHERE table_id = p_table_id AND NOT fd.is_multi AND fd.supports_delta; SELECT array_agg(faceting._get_subquery_clause(fd, format(', NEW.%I, 1', tdef.key) , 'NEW.') ORDER BY facet_id), array_agg(faceting._get_subquery_clause(fd, format(', OLD.%I, -1', tdef.key) , 'OLD.') ORDER BY facet_id), array_agg(fd.base_column) || base_columns INTO insert_subqueries, delete_subqueries, base_columns FROM faceting.facet_definition fd WHERE table_id = p_table_id AND fd.is_multi AND fd.supports_delta; insert_clauses := CASE WHEN array_length(insert_values, 1) > 0 THEN array['VALUES ' || array_to_string(insert_values, E',\n ')] ELSE '{}'::text[] END || insert_subqueries; delete_clauses := CASE WHEN array_length(delete_values, 1) > 0 THEN array['VALUES ' || array_to_string(delete_values, E',\n ')] ELSE '{}'::text[] END || delete_subqueries; sql := format($sql$ CREATE OR REPLACE FUNCTION %s() RETURNS trigger AS $func$ BEGIN IF TG_OP = 'UPDATE' AND OLD.%I != NEW.%I THEN RAISE EXCEPTION 'Update of key column of faceted tables is not supported'; END IF; IF TG_OP = 'INSERT' OR TG_OP = 'UPDATE' THEN INSERT INTO %s (facet_id, facet_value, posting, delta) SELECT * FROM ( %s ) AS deltas(facet_id, facet_value, posting, delta) WHERE facet_value IS NOT NULL ON CONFLICT (facet_id, facet_value, posting) DO UPDATE SET delta = EXCLUDED.delta + %s.delta; END IF; IF TG_OP = 'DELETE' OR TG_OP = 'UPDATE' THEN INSERT INTO %s (facet_id, facet_value, posting, delta) SELECT * FROM ( %s ) AS deltas(facet_id, facet_value, posting, delta) WHERE facet_value IS NOT NULL ON CONFLICT (facet_id, facet_value, posting) DO UPDATE SET delta = EXCLUDED.delta + %s.delta; END IF; RETURN NULL; END; $func$ LANGUAGE plpgsql; CREATE OR REPLACE TRIGGER %s AFTER INSERT OR DELETE OR UPDATE OF %s ON %s FOR EACH ROW EXECUTE FUNCTION %s(); $sql$, -- Trigger name faceting._qualified(tdef.schemaname, tfunc_name), -- Key update check tdef.key, tdef.key, -- Positive deltas faceting._qualified(tdef.schemaname, tdef.delta_table), array_to_string(insert_clauses, E'\n UNION ALL\n '), faceting._qualified(tdef.schemaname, tdef.delta_table), -- Negative deltas faceting._qualified(tdef.schemaname, tdef.delta_table), array_to_string(delete_clauses, E'\n UNION ALL\n '), faceting._qualified(tdef.schemaname, tdef.delta_table), -- Trigger definition trg_name, array_to_string(base_columns, ', '), faceting._qualified(tdef.schemaname, tdef.tablename), faceting._qualified(tdef.schemaname, tfunc_name) ); IF p_create THEN EXECUTE sql; END IF; RETURN sql; END; $$; CREATE FUNCTION faceting.populate_facets(p_table_id oid, p_use_copy bool = false, debug bool = false, facets text[] = null) RETURNS void LANGUAGE plpgsql AS $$ DECLARE tdef faceting.faceted_table; query text; sql text; BEGIN SELECT t.* INTO tdef FROM faceting.faceted_table t WHERE t.table_id = p_table_id; IF tdef.table_id IS NULL THEN RAISE EXCEPTION 'Table % not found', p_table_id; END IF; query := faceting.populate_facets_query(p_table_id, facets => facets); IF p_use_copy THEN EXECUTE format($copy$COPY %s FROM PROGRAM $prog$ psql -h localhost %s -c "COPY (%s) TO STDOUT" $prog$ $copy$, faceting._qualified(tdef.schemaname, tdef.facets_table), current_database(), replace(query, '"', '\"')); RETURN; END IF; sql := format('INSERT INTO %s %s', faceting._qualified(tdef.schemaname, tdef.facets_table), query); IF debug THEN RAISE NOTICE '%s', sql; END IF; EXECUTE sql; END; $$; CREATE FUNCTION faceting.datetrunc_facet(col name, "precision" text, p_facet_name text = null) RETURNS facet_definition LANGUAGE SQL AS $$ SELECT null::int, null::int, coalesce(p_facet_name, col), 'datetrunc', col, jsonb_build_object('precision', "precision"), false, true; $$; CREATE FUNCTION faceting.datetrunc_facet_values(fdef facet_definition, extra_cols text, table_alias text) RETURNS text LANGUAGE plpgsql AS $$ BEGIN RETURN format('(%s, date_trunc(%L, %s%I)::text%s)', fdef.facet_id, fdef.params->>'precision', table_alias, fdef.base_column, extra_cols); END; $$; CREATE FUNCTION faceting.plain_facet(col name, p_facet_name text = null) RETURNS facet_definition LANGUAGE SQL AS $$ SELECT null::int, null::int, coalesce(p_facet_name, col), 'plain', col, '{}'::jsonb, false, true; $$; CREATE FUNCTION faceting.plain_facet_values(fdef facet_definition, extra_cols text, table_alias text) RETURNS text LANGUAGE plpgsql AS $$ BEGIN RETURN format('(%s, %s%I::text%s)', fdef.facet_id, table_alias, fdef.base_column, extra_cols); END; $$; CREATE FUNCTION faceting.bucket_facet(col name, buckets anyarray, p_facet_name text = null) RETURNS facet_definition LANGUAGE SQL AS $$ SELECT null::int, null::int, coalesce(p_facet_name, col), 'bucket', col, jsonb_build_object('buckets', buckets::text), false, true; $$; CREATE FUNCTION faceting.bucket_facet_values(fdef facet_definition, extra_cols text, table_alias text) RETURNS text LANGUAGE plpgsql AS $$ BEGIN RETURN format('(%s, width_bucket(%s%I, %L)::text%s)', fdef.facet_id, table_alias, fdef.base_column, fdef.params->>'buckets', extra_cols); END; $$; CREATE FUNCTION faceting.array_facet(col name, p_facet_name text = null) RETURNS facet_definition LANGUAGE SQL AS $$ SELECT null::int, null::int, coalesce(p_facet_name, col), 'array', col, '{}'::jsonb, true, true; $$; CREATE FUNCTION faceting.array_facet_subquery(fdef facet_definition, extra_cols text, table_alias text) RETURNS text LANGUAGE plpgsql AS $$ BEGIN RETURN format('(SELECT %s, element_value::text%s FROM unnest(%s%I) element_value)', fdef.facet_id, extra_cols, table_alias, fdef.base_column); END; $$; CREATE FUNCTION faceting.joined_plain_facet(col text, from_clause text, correlation text, p_facet_name text = null) RETURNS facet_definition LANGUAGE plpgsql AS $$ DECLARE base_col_name text; BEGIN SELECT ident[array_upper(ident, 1)] INTO base_col_name FROM parse_ident(col) ident; RETURN row(null::oid, null::int, coalesce(p_facet_name, base_col_name), 'joined_plain'::text, NULL::name, jsonb_build_object('col', col, 'from_clause', from_clause, 'correlation', correlation), true, false); END; $$; CREATE FUNCTION faceting.joined_plain_facet_subquery(fdef facet_definition, extra_cols text, table_alias text) RETURNS text LANGUAGE plpgsql AS $$ DECLARE correlation_clause text; BEGIN correlation_clause := replace(fdef.params->>'correlation', '{TABLE}.', table_alias); RETURN format('(SELECT %s, %s::text%s FROM %s WHERE %s)', fdef.facet_id, fdef.params->>'col', extra_cols, fdef.params->>'from_clause', correlation_clause); END; $$; CREATE TYPE faceting.facet_counts AS ( facet_name text, facet_value text, cardinality int8 ); CREATE FUNCTION faceting.top_values(p_table_id oid, n int = 5, facets text[] = null) RETURNS SETOF faceting.facet_counts LANGUAGE plpgsql AS $$ DECLARE tdef faceting.faceted_table; facet_filter text = ''; BEGIN SELECT t.* INTO tdef FROM faceting.faceted_table t WHERE t.table_id = p_table_id; IF tdef.table_id IS NULL THEN RAISE EXCEPTION 'Table % not found', p_table_id; END IF; IF facets IS NOT NULL THEN SELECT format('WHERE facet_id = ANY (''%s'')', array_agg(facet_id)::text) INTO facet_filter FROM faceting.facet_definition fd WHERE fd.facet_name = ANY (facets); END IF; RETURN QUERY EXECUTE format($sql$ SELECT facet_name, facet_value, sum::int8 FROM ( SELECT facet_id, facet_value, sum, rank() OVER (PARTITION BY facet_id ORDER BY sum DESC) rank FROM ( SELECT facet_id, facet_value, sum(rb_cardinality(postinglist)) FROM %s %s GROUP BY 1, 2 ) x ) counts JOIN faceting.facet_definition fd USING (facet_id) WHERE rank <= 5 AND table_id = $1 ORDER BY facet_id, rank, facet_value; $sql$, faceting._qualified(tdef.schemaname, tdef.facets_table), facet_filter) USING p_table_id; END; $$; CREATE TYPE faceting.facet_filter AS ( facet_name text, facet_value text ); CREATE FUNCTION faceting.count_results(p_table_id oid, filters facet_filter[]) RETURNS SETOF faceting.facet_counts LANGUAGE plpgsql AS $$ DECLARE tdef faceting.faceted_table; select_facets int[]; sql text; BEGIN SELECT t.* INTO tdef FROM faceting.faceted_table t WHERE t.table_id = p_table_id; IF tdef.table_id IS NULL THEN RAISE EXCEPTION 'Table % not found', p_table_id; END IF; SELECT array_agg(facet_id) INTO select_facets FROM faceting.facet_definition WHERE table_id = p_table_id AND facet_name NOT IN (SELECT f.facet_name FROM unnest(filters) f); sql := format($sql$ WITH filters AS ( SELECT facet_id, facet_name, facet_value FROM faceting.facet_definition JOIN unnest($1) t USING (facet_name) WHERE table_id = $2 ), lookup AS ( SELECT chunk_id, rb_and_agg(postinglist) postinglist FROM %s d JOIN filters USING (facet_id, facet_value) GROUP BY chunk_id ), results AS ( SELECT facet_id, facet_value, sum(rb_and_cardinality(lookup.postinglist, d.postinglist))::int8 cardinality FROM lookup JOIN %s d USING (chunk_id) WHERE facet_id = ANY ($3) GROUP BY facet_id, facet_value ) SELECT facet_name, facet_value, cardinality FROM results JOIN faceting.facet_definition fd USING (facet_id) WHERE fd.table_id = $2 ORDER BY facet_id, cardinality DESC, facet_value $sql$, faceting._qualified(tdef.schemaname, tdef.facets_table), faceting._qualified(tdef.schemaname, tdef.facets_table)); RETURN QUERY EXECUTE sql USING filters, p_table_id, select_facets; END; $$; CREATE FUNCTION faceting.merge_deltas(p_table_id oid) RETURNS void LANGUAGE plpgsql AS $$ DECLARE sql text; tdef faceting.faceted_table; BEGIN SELECT t.* INTO tdef FROM faceting.faceted_table t WHERE t.table_id = p_table_id; IF tdef.table_id IS NULL THEN RAISE EXCEPTION 'Table % not found', p_table_id; END IF; sql := format($sql$ WITH to_be_aggregated AS (DELETE FROM %s RETURNING *), chunk_deltas AS ( SELECT facet_id, (posting >> %s) chunk_id, facet_value, coalesce(rb_build_agg((posting & ((1<<%s) - 1))::int4) FILTER (WHERE delta > 0), '\x3a30000000000000') AS postings_added, coalesce(rb_build_agg((posting & ((1<<%s) - 1))::int4) FILTER (WHERE delta < 0), '\x3a30000000000000') AS postings_deleted FROM to_be_aggregated GROUP BY 1,2,3 ), updates AS (UPDATE %s AS d SET postinglist = rb_or(rb_andnot(postinglist, postings_deleted), postings_added) FROM chunk_deltas WHERE d.facet_id = chunk_deltas.facet_id AND d.facet_value = chunk_deltas.facet_value AND d.chunk_id = chunk_deltas.chunk_id) INSERT INTO %s SELECT facet_id, chunk_id, facet_value, postings_added FROM chunk_deltas ON CONFLICT (facet_id, facet_value, chunk_id) DO NOTHING; $sql$, faceting._qualified(tdef.schemaname, tdef.delta_table), tdef.chunk_bits, tdef.chunk_bits, tdef.chunk_bits, faceting._qualified(tdef.schemaname, tdef.facets_table), faceting._qualified(tdef.schemaname, tdef.facets_table) ); EXECUTE sql; RETURN; END; $$; CREATE PROCEDURE faceting.run_maintenance(debug bool = false) LANGUAGE plpgsql AS $$ DECLARE tdef faceting.faceted_table; start_ts timestamptz; end_ts timestamptz; BEGIN FOR tdef IN SELECT * FROM faceting.faceted_table LOOP IF debug THEN RAISE NOTICE 'Starting facets maintenance of %', tdef.tablename; END IF; start_ts := clock_timestamp(); PERFORM faceting.merge_deltas(tdef.table_id); COMMIT; end_ts := clock_timestamp(); IF debug THEN RAISE NOTICE 'End facets maintenance of %, duration: %s', tdef.tablename, end_ts - start_ts; END IF; END LOOP; END; $$; pgfaceting-0.2.0/test/000077500000000000000000000000001457633707700146225ustar00rootroot00000000000000pgfaceting-0.2.0/test/expected/000077500000000000000000000000001457633707700164235ustar00rootroot00000000000000pgfaceting-0.2.0/test/expected/base.out000066400000000000000000000450011457633707700200660ustar00rootroot00000000000000CREATE EXTENSION roaringbitmap; CREATE EXTENSION pgfaceting; CREATE SCHEMA facetingtestsuite; CREATE TYPE facetingtestsuite.mimetype AS ENUM ( 'application/pdf', 'text/html', 'image/jpeg', 'image/png', 'application/msword', 'text/csv', 'application/zip', 'application/vnd.ms-powerpoint' ); CREATE TABLE facetingtestsuite.employee ( id int8 primary key, full_name text, department text ); CREATE TABLE facetingtestsuite.categories ( id int8 primary key, owner_id int8 REFERENCES facetingtestsuite.employee (id) ); CREATE TABLE facetingtestsuite.documents ( id int8 primary key, created timestamptz not null, finished timestamptz, category_id int8 REFERENCES facetingtestsuite.categories (id), tags text[], type facetingtestsuite.mimetype, size int8, title text ); CREATE TABLE facetingtestsuite.authors ( document_id int8 REFERENCES facetingtestsuite.documents (id) ON DELETE CASCADE, author_id int8 REFERENCES facetingtestsuite.employee (id), PRIMARY KEY (document_id, author_id) ); COPY facetingtestsuite.employee (id, full_name, department) FROM stdin; COPY facetingtestsuite.categories (id, owner_id) FROM stdin; COPY facetingtestsuite.documents (id, created, finished, category_id, tags, type, size, title) FROM stdin; COPY facetingtestsuite.authors FROM stdin; SELECT faceting.add_faceting_to_table('facetingtestsuite.documents', key => 'id', facets => array[ faceting.datetrunc_facet('created', 'month'), faceting.datetrunc_facet('finished', 'month'), faceting.plain_facet('category_id'), faceting.array_facet('tags'), faceting.bucket_facet('size', buckets => array[0,1000,5000,10000,50000,100000,500000]), faceting.joined_plain_facet('author_id', from_clause => 'facetingtestsuite.authors a', correlation => 'a.document_id = {TABLE}.id', p_facet_name => 'author') ], populate => false ); add_faceting_to_table ----------------------- (1 row) SELECT faceting.populate_facets('facetingtestsuite.documents'::regclass); populate_facets ----------------- (1 row) SELECT * FROM faceting.top_values('facetingtestsuite.documents'::regclass); facet_name | facet_value | cardinality -------------+------------------------------+------------- created | Tue Dec 01 00:00:00 2009 PST | 10 finished | Fri Jan 01 00:00:00 2010 PST | 6 finished | Tue Dec 01 00:00:00 2009 PST | 4 category_id | 24 | 4 category_id | 8 | 2 category_id | 9 | 2 category_id | 12 | 1 tags | blue | 7 tags | orange | 5 tags | green | 4 tags | burlywood | 2 tags | olive | 2 size | 6 | 7 size | 7 | 2 size | 5 | 1 author | 1 | 7 author | 2 | 4 author | 3 | 2 (18 rows) SELECT faceting.add_facets('facetingtestsuite.documents', facets=>array[ faceting.plain_facet('type'), faceting.joined_plain_facet('e.department', from_clause => 'facetingtestsuite.categories c JOIN facetingtestsuite.employee e ON c.owner_id = e.id', correlation => 'c.id = {TABLE}.category_id') ]); add_facets ------------ 7 8 (2 rows) SELECT faceting.populate_facets_query('facetingtestsuite.documents'::regclass::oid); populate_facets_query -------------------------------------------------------------------------------------------------------------------------------------------------------------- + SELECT facet_id, (id >> 20)::int4 chunk_id, facet_value collate "POSIX", rb_build_agg((id & ((1 << 20) - 1))::int4 ORDER BY id) + FROM facetingtestsuite.documents d, + LATERAL ( + VALUES (1, date_trunc('month', d.created)::text), + (2, date_trunc('month', d.finished)::text), + (3, d.category_id::text), + (5, width_bucket(d.size, '{0,1000,5000,10000,50000,100000,500000}')::text), + (7, d.type::text) + UNION ALL + (SELECT 4, element_value::text FROM unnest(d.tags) element_value) + UNION ALL + (SELECT 6, author_id::text FROM facetingtestsuite.authors a WHERE a.document_id = d.id) + UNION ALL + (SELECT 8, e.department::text FROM facetingtestsuite.categories c JOIN facetingtestsuite.employee e ON c.owner_id = e.id WHERE c.id = d.category_id)+ ) t(facet_id, facet_value) + WHERE facet_value IS NOT NULL + GROUP BY facet_id, facet_value collate "POSIX", chunk_id + (1 row) SELECT * FROM faceting.top_values('facetingtestsuite.documents'::regclass); facet_name | facet_value | cardinality -------------+------------------------------+------------- created | Tue Dec 01 00:00:00 2009 PST | 10 finished | Fri Jan 01 00:00:00 2010 PST | 6 finished | Tue Dec 01 00:00:00 2009 PST | 4 category_id | 24 | 4 category_id | 8 | 2 category_id | 9 | 2 category_id | 12 | 1 tags | blue | 7 tags | orange | 5 tags | green | 4 tags | burlywood | 2 tags | olive | 2 size | 6 | 7 size | 7 | 2 size | 5 | 1 author | 1 | 7 author | 2 | 4 author | 3 | 2 type | application/pdf | 5 type | text/html | 3 type | image/jpeg | 2 department | Sales | 7 department | Director | 2 (23 rows) COPY facetingtestsuite.documents (id, created, finished, category_id, tags, type, size, title) FROM stdin; SELECT faceting.merge_deltas('facetingtestsuite.documents'::regclass); merge_deltas -------------- (1 row) SELECT * FROM faceting.top_values('facetingtestsuite.documents'::regclass); facet_name | facet_value | cardinality -------------+------------------------------+------------- created | Tue Dec 01 00:00:00 2009 PST | 20 finished | Fri Jan 01 00:00:00 2010 PST | 15 finished | Tue Dec 01 00:00:00 2009 PST | 5 category_id | 24 | 12 category_id | 9 | 4 category_id | 8 | 2 category_id | 12 | 1 tags | blue | 15 tags | orange | 14 tags | green | 6 tags | brown | 4 tags | red | 3 size | 6 | 14 size | 7 | 4 size | 4 | 1 size | 5 | 1 author | 1 | 7 author | 2 | 4 author | 3 | 2 type | application/pdf | 10 type | image/jpeg | 3 type | image/png | 3 type | text/html | 3 type | text/csv | 1 department | Sales | 7 department | Director | 2 (26 rows) (SELECT 'created' AS facet_name, date_trunc('month', created)::text AS facet_value, COUNT(*) AS cardinality FROM facetingtestsuite.documents GROUP BY 1, 2 ORDER BY 3 DESC, 2 LIMIT 5) UNION ALL (SELECT 'finished', date_trunc('month', finished)::text, COUNT(*) FROM facetingtestsuite.documents GROUP BY 1, 2 ORDER BY 3 DESC, 2 LIMIT 5) UNION ALL (SELECT 'category_id', category_id::text, COUNT(*) FROM facetingtestsuite.documents GROUP BY 1, 2 ORDER BY 3 DESC, 2 LIMIT 5) UNION ALL (SELECT 'type', type::text, COUNT(*) FROM facetingtestsuite.documents GROUP BY 1, 2 ORDER BY 3 DESC, 2 LIMIT 5) UNION ALL (SELECT 'size', width_bucket(size, array[0,1000,5000,10000,50000,100000,500000])::text, COUNT(*) FROM facetingtestsuite.documents GROUP BY 1, 2 ORDER BY 3 DESC, 2 LIMIT 5); facet_name | facet_value | cardinality -------------+------------------------------+------------- created | Tue Dec 01 00:00:00 2009 PST | 20 finished | Fri Jan 01 00:00:00 2010 PST | 15 finished | Tue Dec 01 00:00:00 2009 PST | 5 category_id | 24 | 12 category_id | 9 | 4 category_id | 8 | 2 category_id | 12 | 1 category_id | | 1 type | application/pdf | 10 type | image/jpeg | 3 type | image/png | 3 type | text/html | 3 type | text/csv | 1 size | 6 | 14 size | 7 | 4 size | 4 | 1 size | 5 | 1 (17 rows) SELECT * FROM faceting.count_results('facetingtestsuite.documents'::regclass, filters => array[row('category_id', 24)]::faceting.facet_filter[]); facet_name | facet_value | cardinality ------------+------------------------------+------------- created | Tue Dec 01 00:00:00 2009 PST | 12 finished | Fri Jan 01 00:00:00 2010 PST | 10 finished | Tue Dec 01 00:00:00 2009 PST | 2 tags | orange | 11 tags | blue | 9 tags | green | 5 tags | brown | 4 tags | red | 3 tags | darkslateblue | 2 tags | aqua | 1 tags | burlywood | 1 tags | cadetblue | 1 tags | candy pink | 1 tags | chartreuse | 1 tags | cherry | 1 tags | chocolate | 1 tags | coral | 1 tags | cyan | 1 tags | dimgray | 1 tags | dirt brown | 1 tags | floralwhite | 1 tags | ivory | 1 tags | lavender | 1 tags | lightpink | 1 tags | maroon | 1 tags | olive | 1 tags | pale gold | 1 tags | pale peach | 1 tags | peachy pink | 1 tags | purple | 1 tags | antiquewhite | 0 tags | aqua blue | 0 tags | aquamarine | 0 tags | bisque | 0 tags | lightcoral | 0 tags | mustard brown | 0 tags | pink | 0 tags | red purple | 0 tags | rust | 0 tags | very light pink | 0 size | 6 | 9 size | 7 | 2 size | 4 | 1 size | 5 | 0 author | 1 | 3 author | 2 | 2 author | 3 | 1 type | application/pdf | 5 type | image/jpeg | 3 type | text/html | 2 type | image/png | 1 type | text/csv | 1 department | Sales | 4 department | Director | 0 (54 rows) DELETE FROM facetingtestsuite.documents WHERE 'red' = ANY (tags); SELECT faceting.merge_deltas('facetingtestsuite.documents'::regclass); merge_deltas -------------- (1 row) SELECT * FROM faceting.top_values('facetingtestsuite.documents'::regclass, facets=>array['tags', 'type']); facet_name | facet_value | cardinality ------------+-----------------+------------- tags | blue | 13 tags | orange | 11 tags | green | 5 tags | brown | 4 tags | aqua | 2 tags | burlywood | 2 tags | olive | 2 type | application/pdf | 9 type | image/png | 3 type | image/jpeg | 2 type | text/html | 2 type | text/csv | 1 (12 rows) SELECT faceting.drop_facets('facetingtestsuite.documents', array['type', 'tags', 'not existing']); drop_facets ------------- tags type (2 rows) SELECT * FROM faceting.top_values('facetingtestsuite.documents'::regclass); facet_name | facet_value | cardinality -------------+------------------------------+------------- created | Tue Dec 01 00:00:00 2009 PST | 17 finished | Fri Jan 01 00:00:00 2010 PST | 13 finished | Tue Dec 01 00:00:00 2009 PST | 4 category_id | 24 | 9 category_id | 9 | 4 category_id | 8 | 2 category_id | 12 | 1 size | 6 | 11 size | 7 | 4 size | 4 | 1 size | 5 | 1 author | 1 | 7 author | 2 | 4 author | 3 | 2 department | Sales | 7 department | Director | 2 (16 rows) SELECT faceting.drop_faceting('facetingtestsuite.documents'); drop_faceting --------------- t (1 row) -- Check that adding faceting back in works SELECT faceting.add_faceting_to_table('facetingtestsuite.documents', key => 'id', facets => array[faceting.plain_facet('category_id')]); add_faceting_to_table ----------------------- (1 row) SELECT * FROM faceting.top_values('facetingtestsuite.documents'::regclass); facet_name | facet_value | cardinality -------------+-------------+------------- category_id | 24 | 9 category_id | 9 | 4 category_id | 8 | 2 category_id | 12 | 1 (4 rows) pgfaceting-0.2.0/test/expected/error_checks.out000066400000000000000000000046421457633707700216330ustar00rootroot00000000000000CREATE EXTENSION IF NOT EXISTS roaringbitmap; NOTICE: extension "roaringbitmap" already exists, skipping CREATE EXTENSION IF NOT EXISTS pgfaceting; NOTICE: extension "pgfaceting" already exists, skipping CREATE SCHEMA errorchecks; SET SEARCH_PATH = 'errorchecks', 'public'; CREATE TABLE uuid_based_docs ( id uuid primary key, type text ); CREATE TABLE text_based_docs ( id text primary key, type text ); CREATE TABLE int4_based_docs ( id int4 primary key, type text ); -- Expected error on key col not existing SELECT faceting.add_faceting_to_table('uuid_based_docs', key => 'nonexistent', facets => array[faceting.plain_facet('type')] ); ERROR: Key column nonexistent not found in errorcheckss.uuid_based_docss CONTEXT: PL/pgSQL function faceting.add_faceting_to_table(regclass,name,faceting.facet_definition[],integer,boolean,boolean) line 30 at RAISE -- Expected error on wrong type key column SELECT faceting.add_faceting_to_table('uuid_based_docs', key => 'id', facets => array[faceting.plain_facet('type')] ); ERROR: Key column type uuid is not supported. CONTEXT: PL/pgSQL function faceting.add_faceting_to_table(regclass,name,faceting.facet_definition[],integer,boolean,boolean) line 32 at RAISE SELECT faceting.add_faceting_to_table('text_based_docs', key => 'id', facets => array[faceting.plain_facet('type')] ); ERROR: Key column type text is not supported. CONTEXT: PL/pgSQL function faceting.add_faceting_to_table(regclass,name,faceting.facet_definition[],integer,boolean,boolean) line 32 at RAISE -- Wrong chunk bits SELECT faceting.add_faceting_to_table('int4_based_docs', key => 'id', chunk_bits => 42, facets => array[faceting.plain_facet('type')] ); ERROR: Invalid number of bits per chunk: 42 CONTEXT: PL/pgSQL function faceting.add_faceting_to_table(regclass,name,faceting.facet_definition[],integer,boolean,boolean) line 19 at RAISE -- Not faceted table SELECT faceting.add_facets('int4_based_docs', facets => array[faceting.plain_facet('type')]); ERROR: Table int4_based_docs is not faceted CONTEXT: PL/pgSQL function faceting.add_facets(regclass,faceting.facet_definition[],boolean) line 12 at RAISE -- Expected no error SELECT faceting.add_faceting_to_table('int4_based_docs', key => 'id', facets => array[faceting.plain_facet('type')] ); add_faceting_to_table ----------------------- (1 row) pgfaceting-0.2.0/test/sql/000077500000000000000000000000001457633707700154215ustar00rootroot00000000000000pgfaceting-0.2.0/test/sql/base.sql000066400000000000000000000200701457633707700170530ustar00rootroot00000000000000CREATE EXTENSION roaringbitmap; CREATE EXTENSION pgfaceting; CREATE SCHEMA facetingtestsuite; CREATE TYPE facetingtestsuite.mimetype AS ENUM ( 'application/pdf', 'text/html', 'image/jpeg', 'image/png', 'application/msword', 'text/csv', 'application/zip', 'application/vnd.ms-powerpoint' ); CREATE TABLE facetingtestsuite.employee ( id int8 primary key, full_name text, department text ); CREATE TABLE facetingtestsuite.categories ( id int8 primary key, owner_id int8 REFERENCES facetingtestsuite.employee (id) ); CREATE TABLE facetingtestsuite.documents ( id int8 primary key, created timestamptz not null, finished timestamptz, category_id int8 REFERENCES facetingtestsuite.categories (id), tags text[], type facetingtestsuite.mimetype, size int8, title text ); CREATE TABLE facetingtestsuite.authors ( document_id int8 REFERENCES facetingtestsuite.documents (id) ON DELETE CASCADE, author_id int8 REFERENCES facetingtestsuite.employee (id), PRIMARY KEY (document_id, author_id) ); COPY facetingtestsuite.employee (id, full_name, department) FROM stdin; 1 John Smith Director 2 Jane Doe Sales 3 Jill James Sales \. COPY facetingtestsuite.categories (id, owner_id) FROM stdin; 8 2 9 1 12 3 24 2 \. COPY facetingtestsuite.documents (id, created, finished, category_id, tags, type, size, title) FROM stdin; 1 2010-01-01 00:00:42+02 2010-01-01 09:45:29+02 8 {blue,burlywood,antiquewhite,olive} application/pdf 71205 Interracial marriage Science Research 2 2010-01-01 00:00:37+02 2010-01-01 03:55:08+02 12 {lightcoral,bisque,blue,"aqua blue","red purple",aqua} text/html 682069 Odour and trials helped to improve the country's history through the public 3 2010-01-01 00:00:33+02 2010-01-02 18:29:15+02 9 {"mustard brown","very light pink"} application/pdf 143708 Have technical scale, ordinary, commonsense notions of absolute time and length independent of the 4 2010-01-01 00:00:35+02 2010-01-02 01:12:08+02 24 {orange,green,blue} text/html 280663 Database of (/ˈdɛnmɑːrk/; Danish: Danmark [ˈd̥ænmɑɡ̊]) is a spiral 5 2010-01-01 00:01:06+02 2010-01-01 23:18:56+02 24 {orange,chocolate} image/jpeg 111770 Passage to now resumed 6 2010-01-01 00:01:05+02 2010-01-01 10:25:29+02 8 {blue,aquamarine} application/pdf 110809 East. Mesopotamia, BCE – 480 BCE), when determining a value that 7 2010-01-01 00:00:57+02 2010-01-02 00:41:01+02 \N {} application/pdf 230803 Bahía de It has also conquered 13 South American finds and another 8 2010-01-01 00:01:11+02 2010-01-01 14:22:11+02 24 {blue,burlywood,"dirt brown",orange,ivory,brown,green,olive,lightpink} image/jpeg 1304196 15-fold: from the mid- to late-20th 9 2010-01-01 00:01:47+02 2010-01-01 09:59:57+02 9 {green,blue,orange} application/pdf 142410 Popular Western localized function model. Psychiatric interventions such as local businesses, but also 10 2010-01-01 00:01:31+02 2010-01-01 05:49:47+02 24 {green,lavender,blue,orange,red,darkslateblue} text/html 199703 Rapidly expanding Large Interior Form, 1953-54, Man Enters the Cosmos and Nuclear Energy. \. COPY facetingtestsuite.authors FROM stdin; 1 1 1 2 2 1 3 1 4 1 4 2 4 3 5 1 5 2 6 2 7 3 9 1 10 1 \. SELECT faceting.add_faceting_to_table('facetingtestsuite.documents', key => 'id', facets => array[ faceting.datetrunc_facet('created', 'month'), faceting.datetrunc_facet('finished', 'month'), faceting.plain_facet('category_id'), faceting.array_facet('tags'), faceting.bucket_facet('size', buckets => array[0,1000,5000,10000,50000,100000,500000]), faceting.joined_plain_facet('author_id', from_clause => 'facetingtestsuite.authors a', correlation => 'a.document_id = {TABLE}.id', p_facet_name => 'author') ], populate => false ); SELECT faceting.populate_facets('facetingtestsuite.documents'::regclass); SELECT * FROM faceting.top_values('facetingtestsuite.documents'::regclass); SELECT faceting.add_facets('facetingtestsuite.documents', facets=>array[ faceting.plain_facet('type'), faceting.joined_plain_facet('e.department', from_clause => 'facetingtestsuite.categories c JOIN facetingtestsuite.employee e ON c.owner_id = e.id', correlation => 'c.id = {TABLE}.category_id') ]); SELECT faceting.populate_facets_query('facetingtestsuite.documents'::regclass::oid); SELECT * FROM faceting.top_values('facetingtestsuite.documents'::regclass); COPY facetingtestsuite.documents (id, created, finished, category_id, tags, type, size, title) FROM stdin; 11 2010-01-01 00:01:21+02 2010-01-01 20:31:12+02 9 {blue,pink,orange} image/png 679323 Additional 32 Martin, Saint Pierre and 12 2010-01-01 00:02:12+02 2010-01-02 10:33:25+02 24 {green,maroon,blue,coral,orange} application/pdf 166940 To harness between continents. By the mid-19th century? 13 2010-01-01 00:02:20+02 2010-01-01 03:59:11+02 24 {orange,"pale peach",blue,"peachy pink",chartreuse,aqua,brown} application/pdf 333191 The synchrocyclotron, been exposed 14 2010-01-01 00:02:32+02 2010-01-01 18:50:37+02 24 {orange,cherry,brown} application/pdf 12421 And supernovae as ways to indirectly measure these elusive phenomenological entities. 15 2010-01-01 00:02:47+02 2010-01-01 14:29:27+02 24 {orange,blue,cyan,red,floralwhite,darkslateblue} application/pdf 459132 Ratio. \n the nucleus of a cumulus or cumulonimbus. 16 2010-01-01 00:02:38+02 2010-01-01 20:53:15+02 24 {blue,orange,purple,"pale gold"} application/pdf 140909 Pacific. A observance of halakha may pose serious 17 2010-01-01 00:02:48+02 2010-01-02 08:19:47+02 9 {orange,blue,rust} image/png 414066 Gravity equivalent, it attract the wrath of 18 2010-01-01 00:03:05+02 2010-01-02 15:16:47+02 24 {dimgray,orange,red} image/jpeg 113942 Jim Crow classification methods including 19 2010-01-01 00:03:23+02 2010-01-02 06:33:01+02 24 {"candy pink",blue,orange,brown} text/csv 100419 Trans-Atlantic trade archdioceses, the Archdiocese of Atlanta. 20 2010-01-01 00:03:23+02 2010-01-02 02:24:17+02 24 {cadetblue,blue,green} image/png 705939 Normandy with others. Laughter is a kind of case that \. SELECT faceting.merge_deltas('facetingtestsuite.documents'::regclass); SELECT * FROM faceting.top_values('facetingtestsuite.documents'::regclass); (SELECT 'created' AS facet_name, date_trunc('month', created)::text AS facet_value, COUNT(*) AS cardinality FROM facetingtestsuite.documents GROUP BY 1, 2 ORDER BY 3 DESC, 2 LIMIT 5) UNION ALL (SELECT 'finished', date_trunc('month', finished)::text, COUNT(*) FROM facetingtestsuite.documents GROUP BY 1, 2 ORDER BY 3 DESC, 2 LIMIT 5) UNION ALL (SELECT 'category_id', category_id::text, COUNT(*) FROM facetingtestsuite.documents GROUP BY 1, 2 ORDER BY 3 DESC, 2 LIMIT 5) UNION ALL (SELECT 'type', type::text, COUNT(*) FROM facetingtestsuite.documents GROUP BY 1, 2 ORDER BY 3 DESC, 2 LIMIT 5) UNION ALL (SELECT 'size', width_bucket(size, array[0,1000,5000,10000,50000,100000,500000])::text, COUNT(*) FROM facetingtestsuite.documents GROUP BY 1, 2 ORDER BY 3 DESC, 2 LIMIT 5); SELECT * FROM faceting.count_results('facetingtestsuite.documents'::regclass, filters => array[row('category_id', 24)]::faceting.facet_filter[]); DELETE FROM facetingtestsuite.documents WHERE 'red' = ANY (tags); SELECT faceting.merge_deltas('facetingtestsuite.documents'::regclass); SELECT * FROM faceting.top_values('facetingtestsuite.documents'::regclass, facets=>array['tags', 'type']); SELECT faceting.drop_facets('facetingtestsuite.documents', array['type', 'tags', 'not existing']); SELECT * FROM faceting.top_values('facetingtestsuite.documents'::regclass); SELECT faceting.drop_faceting('facetingtestsuite.documents'); -- Check that adding faceting back in works SELECT faceting.add_faceting_to_table('facetingtestsuite.documents', key => 'id', facets => array[faceting.plain_facet('category_id')]); SELECT * FROM faceting.top_values('facetingtestsuite.documents'::regclass); pgfaceting-0.2.0/test/sql/error_checks.sql000066400000000000000000000025161457633707700206170ustar00rootroot00000000000000CREATE EXTENSION IF NOT EXISTS roaringbitmap; CREATE EXTENSION IF NOT EXISTS pgfaceting; CREATE SCHEMA errorchecks; SET SEARCH_PATH = 'errorchecks', 'public'; CREATE TABLE uuid_based_docs ( id uuid primary key, type text ); CREATE TABLE text_based_docs ( id text primary key, type text ); CREATE TABLE int4_based_docs ( id int4 primary key, type text ); -- Expected error on key col not existing SELECT faceting.add_faceting_to_table('uuid_based_docs', key => 'nonexistent', facets => array[faceting.plain_facet('type')] ); -- Expected error on wrong type key column SELECT faceting.add_faceting_to_table('uuid_based_docs', key => 'id', facets => array[faceting.plain_facet('type')] ); SELECT faceting.add_faceting_to_table('text_based_docs', key => 'id', facets => array[faceting.plain_facet('type')] ); -- Wrong chunk bits SELECT faceting.add_faceting_to_table('int4_based_docs', key => 'id', chunk_bits => 42, facets => array[faceting.plain_facet('type')] ); -- Not faceted table SELECT faceting.add_facets('int4_based_docs', facets => array[faceting.plain_facet('type')]); -- Expected no error SELECT faceting.add_faceting_to_table('int4_based_docs', key => 'id', facets => array[faceting.plain_facet('type')] );