OSDN Git Service

Support triConsistent function for better performance.
authorMasaoFujii <masao.fujii@gmail.com>
Fri, 24 Jul 2015 14:29:12 +0000 (23:29 +0900)
committerMasaoFujii <masao.fujii@gmail.com>
Fri, 24 Jul 2015 14:29:12 +0000 (23:29 +0900)
The triConsistent function makes the full-text search using
pg_bigm much faster when the search keyword has a rare key.

This commit is inspired by the change of pg_trgm:
97f3014647a5bd570032abd2b809d3233003f13f

This commit bumps the pg_bigm version to 1.2.

Since triConsistent mechanism is available only in PostgreSQL 9.4
or later, this commit drops the support for 9.1, 9.2 and 9.3.
However, per discussion, we concluded to be able to live with
this situation because the current stable version of pg_bigm,
i.e., 1.1, keeps being maintained and supporting those old
PostgreSQL versions.

Reviewed by Sawada Masahiko.

Makefile
bigm_gin.c
bigm_op.c
pg_bigm--1.1--1.2.sql [new file with mode: 0644]
pg_bigm--1.2.sql [moved from pg_bigm--1.1.sql with 89% similarity]
pg_bigm.control

index f7b3d29..de988ab 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@ MODULE_big = pg_bigm
 OBJS = bigm_op.o bigm_gin.o
 
 EXTENSION = pg_bigm
-DATA = pg_bigm--1.1.sql pg_bigm--1.0--1.1.sql
+DATA = pg_bigm--1.2.sql pg_bigm--1.1--1.2.sql pg_bigm--1.0--1.1.sql
 PGFILEDESC = "pg_bigm - bigram matching"
 
 REGRESS = pg_bigm pg_bigm_ja
index cf7caf8..ca32883 100644 (file)
 PG_FUNCTION_INFO_V1(gin_extract_value_bigm);
 PG_FUNCTION_INFO_V1(gin_extract_query_bigm);
 PG_FUNCTION_INFO_V1(gin_bigm_consistent);
+PG_FUNCTION_INFO_V1(gin_bigm_triconsistent);
 PG_FUNCTION_INFO_V1(gin_bigm_compare_partial);
 PG_FUNCTION_INFO_V1(pg_gin_pending_stats);
 
-/*
- * The function prototypes are created as a part of PG_FUNCTION_INFO_V1
- * macro since 9.4, and hence the declaration of the function prototypes
- * here is necessary only for 9.3 or before.
- */
-#if PG_VERSION_NUM < 90400
-Datum          gin_extract_value_bigm(PG_FUNCTION_ARGS);
-Datum          gin_extract_query_bigm(PG_FUNCTION_ARGS);
-Datum          gin_bigm_consistent(PG_FUNCTION_ARGS);
-Datum          gin_bigm_compare_partial(PG_FUNCTION_ARGS);
-Datum          pg_gin_pending_stats(PG_FUNCTION_ARGS);
-#endif
-
 Datum
 gin_extract_value_bigm(PG_FUNCTION_ARGS)
 {
@@ -260,6 +248,70 @@ gin_bigm_consistent(PG_FUNCTION_ARGS)
 }
 
 Datum
+gin_bigm_triconsistent(PG_FUNCTION_ARGS)
+{
+       GinTernaryValue  *check = (GinTernaryValue *) PG_GETARG_POINTER(0);
+       StrategyNumber strategy = PG_GETARG_UINT16(1);
+
+       /* text    *query = PG_GETARG_TEXT_P(2); */
+       int32           nkeys = PG_GETARG_INT32(3);
+       Pointer    *extra_data = (Pointer *) PG_GETARG_POINTER(4);
+       GinTernaryValue res = GIN_MAYBE;
+       int32           i,
+                               ntrue;
+
+       switch (strategy)
+       {
+               case LikeStrategyNumber:
+                       /*
+                        * Don't recheck the heap tuple against the query if either
+                        * pg_bigm.enable_recheck is disabled or the search word is the
+                        * special one so that the index can return the exact result.
+                        */
+                       res = (bigm_enable_recheck &&
+                                  (*((bool *) extra_data) || (nkeys != 1))) ?
+                               GIN_MAYBE : GIN_TRUE;
+
+                       /* Check if all extracted bigrams are presented. */
+                       for (i = 0; i < nkeys; i++)
+                       {
+                               if (check[i] == GIN_FALSE)
+                               {
+                                       res = GIN_FALSE;
+                                       break;
+                               }
+                       }
+                       break;
+               case SimilarityStrategyNumber:
+                       /* Count the matches */
+                       ntrue = 0;
+                       for (i = 0; i < nkeys; i++)
+                       {
+                               if (check[i] != GIN_FALSE)
+                                       ntrue++;
+                       }
+#ifdef DIVUNION
+                       res = (nkeys == ntrue) ? GIN_MAYBE :
+                               (((((float4) ntrue) / ((float4) (nkeys - ntrue))) >=
+                                 (float4) bigm_similarity_limit) ? GIN_MAYBE : GIN_FALSE);
+#else
+                       res = (nkeys == 0) ? GIN_FALSE :
+                               (((((float4) ntrue) / ((float4) nkeys)) >=
+                                 (float4) bigm_similarity_limit) ? GIN_MAYBE : GIN_FALSE);
+#endif
+                       if (res != GIN_FALSE && !bigm_enable_recheck)
+                               res = GIN_TRUE;
+                       break;
+               default:
+                       elog(ERROR, "unrecognized strategy number: %d", strategy);
+                       res = GIN_FALSE;                /* keep compiler quiet */
+                       break;
+       }
+
+       PG_RETURN_GIN_TERNARY_VALUE(res);
+}
+
+Datum
 gin_bigm_compare_partial(PG_FUNCTION_ARGS)
 {
        text       *arg1 = PG_GETARG_TEXT_PP(0);
index 93c77d2..82b4be2 100644 (file)
--- a/bigm_op.c
+++ b/bigm_op.c
@@ -38,19 +38,6 @@ PG_FUNCTION_INFO_V1(likequery);
 PG_FUNCTION_INFO_V1(bigm_similarity);
 PG_FUNCTION_INFO_V1(bigm_similarity_op);
 
-/*
- * The function prototypes are created as a part of PG_FUNCTION_INFO_V1
- * macro since 9.4, and hence the declaration of the function prototypes
- * here is necessary only for 9.3 or before.
- */
-#if PG_VERSION_NUM < 90400
-Datum          show_bigm(PG_FUNCTION_ARGS);
-Datum          bigmtextcmp(PG_FUNCTION_ARGS);
-Datum          likequery(PG_FUNCTION_ARGS);
-Datum          bigm_similarity(PG_FUNCTION_ARGS);
-Datum          bigm_similarity_op(PG_FUNCTION_ARGS);
-#endif
-
 void           _PG_init(void);
 void           _PG_fini(void);
 
diff --git a/pg_bigm--1.1--1.2.sql b/pg_bigm--1.1--1.2.sql
new file mode 100644 (file)
index 0000000..dfbf5cc
--- /dev/null
@@ -0,0 +1,10 @@
+-- complain if script is sourced in psql, rather than via ALTER EXTENSION
+\echo Use "ALTER EXTENSION pg_bigm UPDATE TO '1.2'" to load this file. \quit
+
+CREATE FUNCTION gin_bigm_triconsistent(internal, int2, text, int4, internal, internal, internal)
+RETURNS "char"
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE STRICT;
+
+ALTER OPERATOR FAMILY gin_bigm_ops USING gin ADD
+        FUNCTION        6       gin_bigm_triconsistent (internal, int2, text, int4, internal, internal, internal);
similarity index 89%
rename from pg_bigm--1.1.sql
rename to pg_bigm--1.2.sql
index 196be9e..06f35c1 100644 (file)
@@ -41,6 +41,11 @@ RETURNS bool
 AS 'MODULE_PATHNAME'
 LANGUAGE C IMMUTABLE STRICT;
 
+CREATE FUNCTION gin_bigm_triconsistent(internal, int2, text, int4, internal, internal, internal)
+RETURNS "char"
+AS 'MODULE_PATHNAME'
+LANGUAGE C IMMUTABLE STRICT;
+
 CREATE FUNCTION gin_bigm_compare_partial(text, text, int2, internal)
 RETURNS bool
 AS 'MODULE_PATHNAME'
@@ -62,6 +67,7 @@ AS
         FUNCTION        3       gin_extract_query_bigm (text, internal, int2, internal, internal, internal, internal),
         FUNCTION        4       gin_bigm_consistent (internal, int2, text, int4, internal, internal, internal, internal),
         FUNCTION        5       gin_bigm_compare_partial (text, text, int2, internal),
+        FUNCTION        6       gin_bigm_triconsistent (internal, int2, text, int4, internal, internal, internal),
         STORAGE         text;
 
 CREATE FUNCTION likequery(text)
index ec861e2..17c81f0 100644 (file)
@@ -1,5 +1,5 @@
 # pg_bigm extension
 comment = 'text similarity measurement and index searching based on bigrams'
-default_version = '1.1'
+default_version = '1.2'
 module_pathname = '$libdir/pg_bigm'
 relocatable = true