From d0b296b6211737acbec89a7c3750fbbcbbd1750c Mon Sep 17 00:00:00 2001 From: MasaoFujii Date: Fri, 24 Jul 2015 23:29:12 +0900 Subject: [PATCH] Support triConsistent function for better performance. The triConsistent function makes the full-text search using pg_bigm much faster when the search keyword has a rare key. This commit is inspired by the change of pg_trgm: 97f3014647a5bd570032abd2b809d3233003f13f This commit bumps the pg_bigm version to 1.2. Since triConsistent mechanism is available only in PostgreSQL 9.4 or later, this commit drops the support for 9.1, 9.2 and 9.3. However, per discussion, we concluded to be able to live with this situation because the current stable version of pg_bigm, i.e., 1.1, keeps being maintained and supporting those old PostgreSQL versions. Reviewed by Sawada Masahiko. --- Makefile | 2 +- bigm_gin.c | 78 ++++++++++++++++++++++++++++++------ bigm_op.c | 13 ------ pg_bigm--1.1--1.2.sql | 10 +++++ pg_bigm--1.1.sql => pg_bigm--1.2.sql | 6 +++ pg_bigm.control | 2 +- 6 files changed, 83 insertions(+), 28 deletions(-) create mode 100644 pg_bigm--1.1--1.2.sql rename pg_bigm--1.1.sql => pg_bigm--1.2.sql (89%) diff --git a/Makefile b/Makefile index f7b3d29..de988ab 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ MODULE_big = pg_bigm OBJS = bigm_op.o bigm_gin.o EXTENSION = pg_bigm -DATA = pg_bigm--1.1.sql pg_bigm--1.0--1.1.sql +DATA = pg_bigm--1.2.sql pg_bigm--1.1--1.2.sql pg_bigm--1.0--1.1.sql PGFILEDESC = "pg_bigm - bigram matching" REGRESS = pg_bigm pg_bigm_ja diff --git a/bigm_gin.c b/bigm_gin.c index cf7caf8..ca32883 100644 --- a/bigm_gin.c +++ b/bigm_gin.c @@ -32,22 +32,10 @@ PG_FUNCTION_INFO_V1(gin_extract_value_bigm); PG_FUNCTION_INFO_V1(gin_extract_query_bigm); PG_FUNCTION_INFO_V1(gin_bigm_consistent); +PG_FUNCTION_INFO_V1(gin_bigm_triconsistent); PG_FUNCTION_INFO_V1(gin_bigm_compare_partial); PG_FUNCTION_INFO_V1(pg_gin_pending_stats); -/* - * The function prototypes are created as a part of PG_FUNCTION_INFO_V1 - * macro since 9.4, and hence the declaration of the function prototypes - * here is necessary only for 9.3 or before. - */ -#if PG_VERSION_NUM < 90400 -Datum gin_extract_value_bigm(PG_FUNCTION_ARGS); -Datum gin_extract_query_bigm(PG_FUNCTION_ARGS); -Datum gin_bigm_consistent(PG_FUNCTION_ARGS); -Datum gin_bigm_compare_partial(PG_FUNCTION_ARGS); -Datum pg_gin_pending_stats(PG_FUNCTION_ARGS); -#endif - Datum gin_extract_value_bigm(PG_FUNCTION_ARGS) { @@ -260,6 +248,70 @@ gin_bigm_consistent(PG_FUNCTION_ARGS) } Datum +gin_bigm_triconsistent(PG_FUNCTION_ARGS) +{ + GinTernaryValue *check = (GinTernaryValue *) PG_GETARG_POINTER(0); + StrategyNumber strategy = PG_GETARG_UINT16(1); + + /* text *query = PG_GETARG_TEXT_P(2); */ + int32 nkeys = PG_GETARG_INT32(3); + Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); + GinTernaryValue res = GIN_MAYBE; + int32 i, + ntrue; + + switch (strategy) + { + case LikeStrategyNumber: + /* + * Don't recheck the heap tuple against the query if either + * pg_bigm.enable_recheck is disabled or the search word is the + * special one so that the index can return the exact result. + */ + res = (bigm_enable_recheck && + (*((bool *) extra_data) || (nkeys != 1))) ? + GIN_MAYBE : GIN_TRUE; + + /* Check if all extracted bigrams are presented. */ + for (i = 0; i < nkeys; i++) + { + if (check[i] == GIN_FALSE) + { + res = GIN_FALSE; + break; + } + } + break; + case SimilarityStrategyNumber: + /* Count the matches */ + ntrue = 0; + for (i = 0; i < nkeys; i++) + { + if (check[i] != GIN_FALSE) + ntrue++; + } +#ifdef DIVUNION + res = (nkeys == ntrue) ? GIN_MAYBE : + (((((float4) ntrue) / ((float4) (nkeys - ntrue))) >= + (float4) bigm_similarity_limit) ? GIN_MAYBE : GIN_FALSE); +#else + res = (nkeys == 0) ? GIN_FALSE : + (((((float4) ntrue) / ((float4) nkeys)) >= + (float4) bigm_similarity_limit) ? GIN_MAYBE : GIN_FALSE); +#endif + if (res != GIN_FALSE && !bigm_enable_recheck) + res = GIN_TRUE; + break; + default: + elog(ERROR, "unrecognized strategy number: %d", strategy); + res = GIN_FALSE; /* keep compiler quiet */ + break; + } + + PG_RETURN_GIN_TERNARY_VALUE(res); +} + +Datum gin_bigm_compare_partial(PG_FUNCTION_ARGS) { text *arg1 = PG_GETARG_TEXT_PP(0); diff --git a/bigm_op.c b/bigm_op.c index 93c77d2..82b4be2 100644 --- a/bigm_op.c +++ b/bigm_op.c @@ -38,19 +38,6 @@ PG_FUNCTION_INFO_V1(likequery); PG_FUNCTION_INFO_V1(bigm_similarity); PG_FUNCTION_INFO_V1(bigm_similarity_op); -/* - * The function prototypes are created as a part of PG_FUNCTION_INFO_V1 - * macro since 9.4, and hence the declaration of the function prototypes - * here is necessary only for 9.3 or before. - */ -#if PG_VERSION_NUM < 90400 -Datum show_bigm(PG_FUNCTION_ARGS); -Datum bigmtextcmp(PG_FUNCTION_ARGS); -Datum likequery(PG_FUNCTION_ARGS); -Datum bigm_similarity(PG_FUNCTION_ARGS); -Datum bigm_similarity_op(PG_FUNCTION_ARGS); -#endif - void _PG_init(void); void _PG_fini(void); diff --git a/pg_bigm--1.1--1.2.sql b/pg_bigm--1.1--1.2.sql new file mode 100644 index 0000000..dfbf5cc --- /dev/null +++ b/pg_bigm--1.1--1.2.sql @@ -0,0 +1,10 @@ +-- complain if script is sourced in psql, rather than via ALTER EXTENSION +\echo Use "ALTER EXTENSION pg_bigm UPDATE TO '1.2'" to load this file. \quit + +CREATE FUNCTION gin_bigm_triconsistent(internal, int2, text, int4, internal, internal, internal) +RETURNS "char" +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + +ALTER OPERATOR FAMILY gin_bigm_ops USING gin ADD + FUNCTION 6 gin_bigm_triconsistent (internal, int2, text, int4, internal, internal, internal); diff --git a/pg_bigm--1.1.sql b/pg_bigm--1.2.sql similarity index 89% rename from pg_bigm--1.1.sql rename to pg_bigm--1.2.sql index 196be9e..06f35c1 100644 --- a/pg_bigm--1.1.sql +++ b/pg_bigm--1.2.sql @@ -41,6 +41,11 @@ RETURNS bool AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT; +CREATE FUNCTION gin_bigm_triconsistent(internal, int2, text, int4, internal, internal, internal) +RETURNS "char" +AS 'MODULE_PATHNAME' +LANGUAGE C IMMUTABLE STRICT; + CREATE FUNCTION gin_bigm_compare_partial(text, text, int2, internal) RETURNS bool AS 'MODULE_PATHNAME' @@ -62,6 +67,7 @@ AS FUNCTION 3 gin_extract_query_bigm (text, internal, int2, internal, internal, internal, internal), FUNCTION 4 gin_bigm_consistent (internal, int2, text, int4, internal, internal, internal, internal), FUNCTION 5 gin_bigm_compare_partial (text, text, int2, internal), + FUNCTION 6 gin_bigm_triconsistent (internal, int2, text, int4, internal, internal, internal), STORAGE text; CREATE FUNCTION likequery(text) diff --git a/pg_bigm.control b/pg_bigm.control index ec861e2..17c81f0 100644 --- a/pg_bigm.control +++ b/pg_bigm.control @@ -1,5 +1,5 @@ # pg_bigm extension comment = 'text similarity measurement and index searching based on bigrams' -default_version = '1.1' +default_version = '1.2' module_pathname = '$libdir/pg_bigm' relocatable = true -- 2.11.0