From 299f7f23c94f7674b780b1c3965201d3034bb6c9 Mon Sep 17 00:00:00 2001 From: MasaoFujii Date: Thu, 27 Aug 2015 01:43:34 +0900 Subject: [PATCH] Add pg_gin_pending_cleanup function. pg_gin_pending_cleanup is the function that cleans up the pending list of the GIN index by moving tuples in it to the main GIN data structure in bulk. Even without this function, we can clean up the pending list by using VACUUM. However, since VACUUM needs to do not only the pending list cleanup but also other various jobs, it usually takes a long time and its performance impact is likely to be big. So pg_gin_pending_cleanup function is useful because we can clean up the list more quickly and avoid such big performance impact by using the function. Reviewed by Masahiko Sawada --- bigm_gin.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++++ expected/pg_bigm.out | 34 ++++++++++++++++++++++++---- html/pg_bigm-1-2.html | 30 +++++++++++++++++++++++++ html/pg_bigm_en-1-2.html | 30 +++++++++++++++++++++++++ pg_bigm--1.2.sql | 5 +++++ sql/pg_bigm.sql | 27 +++++++++++++++++++--- 6 files changed, 177 insertions(+), 7 deletions(-) diff --git a/bigm_gin.c b/bigm_gin.c index f65855f..48043b9 100644 --- a/bigm_gin.c +++ b/bigm_gin.c @@ -19,12 +19,15 @@ #include "access/itup.h" #include "access/skey.h" #include "access/tuptoaster.h" +#include "access/xlog.h" #include "catalog/pg_type.h" #include "funcapi.h" #include "mb/pg_wchar.h" +#include "miscadmin.h" #include "storage/bufmgr.h" #include "storage/bufpage.h" #include "tsearch/ts_locale.h" +#include "utils/acl.h" #include "utils/array.h" #include "utils/builtins.h" #include "utils/rel.h" @@ -35,6 +38,7 @@ PG_FUNCTION_INFO_V1(gin_extract_query_bigm); PG_FUNCTION_INFO_V1(gin_bigm_consistent); PG_FUNCTION_INFO_V1(gin_bigm_compare_partial); PG_FUNCTION_INFO_V1(pg_gin_pending_stats); +PG_FUNCTION_INFO_V1(pg_gin_pending_cleanup); /* triConsistent function is available only in 9.4 or later */ #if PG_VERSION_NUM >= 90400 @@ -52,6 +56,7 @@ Datum gin_extract_query_bigm(PG_FUNCTION_ARGS); Datum gin_bigm_consistent(PG_FUNCTION_ARGS); Datum gin_bigm_compare_partial(PG_FUNCTION_ARGS); Datum pg_gin_pending_stats(PG_FUNCTION_ARGS); +Datum pg_gin_pending_cleanup(PG_FUNCTION_ARGS); #endif Datum @@ -425,3 +430,56 @@ pg_gin_pending_stats(PG_FUNCTION_ARGS) tuple = heap_form_tuple(tupdesc, values, isnull); PG_RETURN_DATUM(HeapTupleGetDatum(tuple)); } + +/* + * Move tuples from pending pages into regular GIN structure. + */ +Datum +pg_gin_pending_cleanup(PG_FUNCTION_ARGS) +{ + Oid relid = PG_GETARG_OID(0); + bool delay = PG_GETARG_BOOL(1); + Relation rel; + IndexBulkDeleteResult *stats; + GinState ginstate; + + rel = relation_open(relid, RowExclusiveLock); + + if (!pg_class_ownercheck(relid, GetUserId())) + aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_CLASS, + RelationGetRelationName(rel)); + + if (RecoveryInProgress()) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("recovery is in progress"), + errhint("GIN pending list cannot be cleaned up during recovery."))); + + if (rel->rd_rel->relkind != RELKIND_INDEX || + rel->rd_rel->relam != GIN_AM_OID) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("relation \"%s\" is not a GIN index", + RelationGetRelationName(rel)))); + + /* + * Reject attempts to read non-local temporary relations; we would be + * likely to get wrong data since we have no visibility into the owning + * session's local buffers. + */ + if (RELATION_IS_OTHER_TEMP(rel)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot access temporary indexes of other sessions"))); + + /* + * Set up all-zero stats and cleanup pending inserts. + */ + stats = (IndexBulkDeleteResult *) palloc0(sizeof(IndexBulkDeleteResult)); + initGinState(&ginstate, rel); + ginInsertCleanup(&ginstate, delay, stats); + + relation_close(rel, RowExclusiveLock); + + PG_RETURN_INT32(stats->pages_deleted); +} diff --git a/expected/pg_bigm.out b/expected/pg_bigm.out index 6ccc4ce..be99125 100644 --- a/expected/pg_bigm.out +++ b/expected/pg_bigm.out @@ -100,26 +100,52 @@ CREATE TABLE test_bigm (col1 text, col2 text); CREATE INDEX test_bigm_idx ON test_bigm USING gin (col1 gin_bigm_ops, col2 gin_bigm_ops); \copy test_bigm from 'data/bigm.csv' with csv --- tests pg_gin_pending_stats +-- tests pg_gin_pending_stats and pg_gin_pending_cleanup SELECT * FROM pg_gin_pending_stats('test_bigm_idx'); pages | tuples -------+-------- 43 | 249 (1 row) -VACUUM; +SELECT pg_gin_pending_cleanup('test_bigm_idx'); + pg_gin_pending_cleanup +------------------------ + 43 +(1 row) + SELECT * FROM pg_gin_pending_stats('test_bigm_idx'); pages | tuples -------+-------- 0 | 0 (1 row) -SELECT * FROM pg_gin_pending_stats('test_bigm'); -ERROR: relation "test_bigm" is not a GIN index CREATE INDEX test_bigm_btree ON test_bigm USING btree (col2); SELECT * FROM pg_gin_pending_stats('test_bigm_btree'); ERROR: relation "test_bigm_btree" is not a GIN index +SELECT * FROM pg_gin_pending_cleanup('test_bigm_btree'); +ERROR: relation "test_bigm_btree" is not a GIN index DROP INDEX test_bigm_btree; +-- tests for permission checks of pg_gin_pending_cleanup +CREATE ROLE test_bigm2_owner WITH NOSUPERUSER LOGIN; +CREATE ROLE test_bigm2_user WITH NOSUPERUSER LOGIN; +SET SESSION AUTHORIZATION test_bigm2_owner; +CREATE TABLE test_bigm2 (col1 text, col2 text); +CREATE INDEX test_bigm2_idx ON test_bigm2 + USING gin (col1 gin_bigm_ops, col2 gin_bigm_ops); +\copy test_bigm2 from 'data/bigm.csv' with csv +SELECT * FROM pg_gin_pending_cleanup('test_bigm2_idx'); + pg_gin_pending_cleanup +------------------------ + 43 +(1 row) + +SET SESSION AUTHORIZATION test_bigm2_user; +SELECT * FROM pg_gin_pending_cleanup('test_bigm2_idx'); +ERROR: must be owner of relation test_bigm2_idx +RESET SESSION AUTHORIZATION; +DROP TABLE test_bigm2; +DROP ROLE test_bigm2_owner; +DROP ROLE test_bigm2_user; -- tests for full-text search EXPLAIN (COSTS off) SELECT * FROM test_bigm WHERE col1 LIKE likequery('a'); QUERY PLAN diff --git a/html/pg_bigm-1-2.html b/html/pg_bigm-1-2.html index 7c2bdea..225cfcb 100644 --- a/html/pg_bigm-1-2.html +++ b/html/pg_bigm-1-2.html @@ -408,6 +408,36 @@ GINインデックスの待機リストとFASTUPDATEオプションの詳細は (1 row) +

pg_gin_pending_cleanup

+

pg_gin_pending_cleanupは、待機リストからGINインデックス(引数1)にデータを一括移動することで、待機リストを整理する関数です。

+ + + +

+FASTUPDATEオプション無効で作成されたGINインデックスが引数1の場合、そのGINインデックスは待機リストを持たないため、待機リストの整理は発生せず、戻り値1は0となることに注意してください。 +GINインデックスの待機リストとFASTUPDATEオプションの詳細は、GIN高速更新手法を参照してください。 +

+

+引数2がTRUEの場合、この関数は、VACUUM遅延を使って待機リストの整理による負荷を平滑化します。 +引数2がFALSEもしくは未指定の場合は、この関数は全速力で待機リストを整理します。 +

+

+この関数を実行するには、GINインデックスの所有者もしくはスーパーユーザでなければなりません。 +この関数はリカバリ中には実行できません。 +

+ +
+=# SELECT pg_gin_pending_cleanup('pg_tools_idx');
+ pg_gin_pending_cleanup 
+------------------------
+                      1
+(1 row)
+
+

パラメータ

pg_bigm.last_update

pg_bigm.last_updateは、pg_bigmモジュールの最終更新日付を報告するパラメータです。このパラメータは読み取り専用です。 postgresql.confやSET文で設定値を変更することはできません。

diff --git a/html/pg_bigm_en-1-2.html b/html/pg_bigm_en-1-2.html index 08934a2..7fe3dfd 100644 --- a/html/pg_bigm_en-1-2.html +++ b/html/pg_bigm_en-1-2.html @@ -405,6 +405,36 @@ Please see pg_gin_pending_cleanup +

pg_gin_pending_cleanup is a function that cleans up the pending list of GIN index (argument #1) by moving tuples in it to the main GIN data structure in bulk.

+ +
+ +

+Note that the cleanup does not happen and the return value #1 is 0 if the argument #1 is the GIN index built with FASTUPDATE option disabled because it doesn't have a pending list. +Please see GIN Fast Update Technique for details of the pending list and FASTUPDATE option. +

+

+If the argument #2 is TRUE, this function uses the cost-based VACUUM delay to reduce the performance impact of the pending list cleanup. +If the argument #2 is FALSE or not supplied, this function cleans up the pending list as quickly as possible. +

+

+Only superusers and the owner of the GIN index can execute this function. +This function cannot be executed during recovery. +

+ +
+=# SELECT pg_gin_pending_cleanup('pg_tools_idx');
+ pg_gin_pending_cleanup 
+------------------------
+                      1
+(1 row)
+
+

Parameters

pg_bigm.last_update

pg_bigm.last_update is a parameter that reports the last updated date of the pg_bigm module. This parameter is read-only. You cannot change the value of this parameter at all.

diff --git a/pg_bigm--1.2.sql b/pg_bigm--1.2.sql index a5aa2bf..e34a10d 100644 --- a/pg_bigm--1.2.sql +++ b/pg_bigm--1.2.sql @@ -74,6 +74,11 @@ RETURNS record AS 'MODULE_PATHNAME' LANGUAGE C STRICT IMMUTABLE; +CREATE FUNCTION pg_gin_pending_cleanup(index regclass, delay boolean DEFAULT false) +RETURNS int4 +AS 'MODULE_PATHNAME' +LANGUAGE C STRICT IMMUTABLE; + /* triConsistent function is available only in 9.4 or later */ DO $$ DECLARE diff --git a/sql/pg_bigm.sql b/sql/pg_bigm.sql index 76d186d..c973531 100644 --- a/sql/pg_bigm.sql +++ b/sql/pg_bigm.sql @@ -37,15 +37,36 @@ CREATE INDEX test_bigm_idx ON test_bigm \copy test_bigm from 'data/bigm.csv' with csv --- tests pg_gin_pending_stats +-- tests pg_gin_pending_stats and pg_gin_pending_cleanup SELECT * FROM pg_gin_pending_stats('test_bigm_idx'); -VACUUM; +SELECT pg_gin_pending_cleanup('test_bigm_idx'); SELECT * FROM pg_gin_pending_stats('test_bigm_idx'); -SELECT * FROM pg_gin_pending_stats('test_bigm'); + CREATE INDEX test_bigm_btree ON test_bigm USING btree (col2); SELECT * FROM pg_gin_pending_stats('test_bigm_btree'); +SELECT * FROM pg_gin_pending_cleanup('test_bigm_btree'); DROP INDEX test_bigm_btree; +-- tests for permission checks of pg_gin_pending_cleanup +CREATE ROLE test_bigm2_owner WITH NOSUPERUSER LOGIN; +CREATE ROLE test_bigm2_user WITH NOSUPERUSER LOGIN; +SET SESSION AUTHORIZATION test_bigm2_owner; + +CREATE TABLE test_bigm2 (col1 text, col2 text); +CREATE INDEX test_bigm2_idx ON test_bigm2 + USING gin (col1 gin_bigm_ops, col2 gin_bigm_ops); + +\copy test_bigm2 from 'data/bigm.csv' with csv + +SELECT * FROM pg_gin_pending_cleanup('test_bigm2_idx'); +SET SESSION AUTHORIZATION test_bigm2_user; +SELECT * FROM pg_gin_pending_cleanup('test_bigm2_idx'); +RESET SESSION AUTHORIZATION; + +DROP TABLE test_bigm2; +DROP ROLE test_bigm2_owner; +DROP ROLE test_bigm2_user; + -- tests for full-text search EXPLAIN (COSTS off) SELECT * FROM test_bigm WHERE col1 LIKE likequery('a'); EXPLAIN (COSTS off) SELECT * FROM test_bigm WHERE col1 LIKE likequery('am'); -- 2.11.0