OSDN Git Service

Switch CI from Travis CI to GitHub Actions (#12)
[pgbigm/pg_bigm.git] / bigm_gin.c
index 5bcbd4b..53d7198 100644 (file)
@@ -1,11 +1,13 @@
 /*-------------------------------------------------------------------------
  *
+ * Portions Copyright (c) 2017-2023, pg_bigm Development Group
+ * Portions Copyright (c) 2013-2016, NTT DATA Corporation
  * Portions Copyright (c) 2007-2012, PostgreSQL Global Development Group
  *
  * Changelog:
- *   2013/01/09
- *   Support full text search using bigrams.
- *   Author: NTT DATA Corporation
+ *      2013/01/09
+ *      Support full text search using bigrams.
+ *      Author: NTT DATA Corporation
  *
  *-------------------------------------------------------------------------
  */
 #include "access/gin.h"
 #include "access/gin_private.h"
 #include "access/itup.h"
+#if PG_VERSION_NUM >= 120000
+#include "access/relation.h"
+#endif
 #include "access/skey.h"
+#if PG_VERSION_NUM < 130000
 #include "access/tuptoaster.h"
+#endif
+#include "access/xlog.h"
+#if PG_VERSION_NUM > 90500
+#include "catalog/pg_am.h"
+#endif
 #include "catalog/pg_type.h"
 #include "funcapi.h"
 #include "mb/pg_wchar.h"
 #include "tsearch/ts_locale.h"
 #include "utils/array.h"
 #include "utils/builtins.h"
+#include "utils/rel.h"
 
 
 PG_FUNCTION_INFO_V1(gin_extract_value_bigm);
-Datum          gin_extract_value_bigm(PG_FUNCTION_ARGS);
-
 PG_FUNCTION_INFO_V1(gin_extract_query_bigm);
-Datum          gin_extract_query_bigm(PG_FUNCTION_ARGS);
-
 PG_FUNCTION_INFO_V1(gin_bigm_consistent);
-Datum          gin_bigm_consistent(PG_FUNCTION_ARGS);
-
 PG_FUNCTION_INFO_V1(gin_bigm_compare_partial);
-Datum          gin_bigm_compare_partial(PG_FUNCTION_ARGS);
-
 PG_FUNCTION_INFO_V1(pg_gin_pending_stats);
+
+/* triConsistent function is available only in 9.4 or later */
+#if PG_VERSION_NUM >= 90400
+PG_FUNCTION_INFO_V1(gin_bigm_triconsistent);
+#endif
+
+/*
+ * The function prototypes are created as a part of PG_FUNCTION_INFO_V1
+ * macro since 9.4, and hence the declaration of the function prototypes
+ * here is necessary only for 9.3 or before.
+ */
+#if PG_VERSION_NUM < 90400
+Datum          gin_extract_value_bigm(PG_FUNCTION_ARGS);
+Datum          gin_extract_query_bigm(PG_FUNCTION_ARGS);
+Datum          gin_bigm_consistent(PG_FUNCTION_ARGS);
+Datum          gin_bigm_compare_partial(PG_FUNCTION_ARGS);
 Datum          pg_gin_pending_stats(PG_FUNCTION_ARGS);
+#endif
 
 Datum
 gin_extract_value_bigm(PG_FUNCTION_ARGS)
@@ -68,7 +89,8 @@ gin_extract_value_bigm(PG_FUNCTION_ARGS)
                ptr = GETARR(bgm);
                for (i = 0; i < bgmlen; i++)
                {
-                       text            *item = cstring_to_text_with_len(ptr->str, ptr->bytelen);
+                       text       *item = cstring_to_text_with_len(ptr->str, ptr->bytelen);
+
                        entries[i] = PointerGetDatum(item);
                        ptr++;
                }
@@ -84,8 +106,9 @@ gin_extract_query_bigm(PG_FUNCTION_ARGS)
        int32      *nentries = (int32 *) PG_GETARG_POINTER(1);
        StrategyNumber strategy = PG_GETARG_UINT16(2);
 
-       bool   **pmatch = (bool **) PG_GETARG_POINTER(3);
-       Pointer   **extra_data = (Pointer **) PG_GETARG_POINTER(4);
+       bool      **pmatch = (bool **) PG_GETARG_POINTER(3);
+       Pointer   **extra_data = (Pointer **) PG_GETARG_POINTER(4);
+
        /* bool   **nullFlags = (bool **) PG_GETARG_POINTER(5); */
        int32      *searchMode = (int32 *) PG_GETARG_POINTER(6);
        Datum      *entries = NULL;
@@ -99,9 +122,9 @@ gin_extract_query_bigm(PG_FUNCTION_ARGS)
        {
                case LikeStrategyNumber:
                {
-                       char    *str = VARDATA(val);
-                       int             slen = VARSIZE(val) - VARHDRSZ;
-                       bool    *recheck;
+                       char       *str = VARDATA(val);
+                       int                     slen = VARSIZE(val) - VARHDRSZ;
+                       bool       *recheck;
 
                        /*
                         * For wildcard search we extract all the bigrams that every
@@ -111,17 +134,18 @@ gin_extract_query_bigm(PG_FUNCTION_ARGS)
                        bgmlen = ARRNELEM(bgm);
 
                        /*
-                        * Check whether the heap tuple fetched by index search needs to be
-                        * rechecked against the query. If the search word consists of one
-                        * or two characters and doesn't contain any space character, we can
-                        * guarantee that the index test would be exact. That is, the heap
-                        * tuple does match the query, so it doesn't need to be rechecked.
+                        * Check whether the heap tuple fetched by index search needs to
+                        * be rechecked against the query. If the search word consists of
+                        * one or two characters and doesn't contain any space character,
+                        * we can guarantee that the index test would be exact. That is,
+                        * the heap tuple does match the query, so it doesn't need to be
+                        * rechecked.
                         */
                        *extra_data = (Pointer *) palloc(sizeof(bool));
                        recheck = (bool *) *extra_data;
                        if (bgmlen == 1 && !removeDups)
                        {
-                               const char      *sp;
+                               const char *sp;
 
                                *recheck = false;
                                for (sp = str; (sp - str) < slen;)
@@ -139,6 +163,12 @@ gin_extract_query_bigm(PG_FUNCTION_ARGS)
                                *recheck = true;
                        break;
                }
+               case SimilarityStrategyNumber:
+               {
+                       bgm = generate_bigm(VARDATA(val), VARSIZE(val) - VARHDRSZ);
+                       bgmlen = ARRNELEM(bgm);
+                       break;
+               }
                default:
                        elog(ERROR, "unrecognized strategy number: %d", strategy);
                        bgm = NULL;                     /* keep compiler quiet */
@@ -155,7 +185,7 @@ gin_extract_query_bigm(PG_FUNCTION_ARGS)
                ptr = GETARR(bgm);
                for (i = 0; i < *nentries; i++)
                {
-                       text            *item;
+                       text       *item;
 
                        if (ptr->pmatch)
                        {
@@ -187,25 +217,25 @@ gin_bigm_consistent(PG_FUNCTION_ARGS)
        /* text    *query = PG_GETARG_TEXT_P(2); */
        int32           nkeys = PG_GETARG_INT32(3);
 
-       Pointer   *extra_data = (Pointer *) PG_GETARG_POINTER(4);
+       Pointer    *extra_data = (Pointer *) PG_GETARG_POINTER(4);
        bool       *recheck = (bool *) PG_GETARG_POINTER(5);
        bool            res;
        int32           i;
-
-       Assert(nkeys > 0);
-
-       /*
-        * Don't recheck the heap tuple against the query if either
-        * pg_bigm.enable_recheck is disabled or the search word is
-        * the special one so that the index can return the exact
-        * result.
-        */
-       *recheck = bigm_enable_recheck &&
-               ((nkeys > 1) || *((bool *) extra_data));
+       int32           ntrue;
 
        switch (strategy)
        {
                case LikeStrategyNumber:
+
+                       /*
+                        * Don't recheck the heap tuple against the query if either
+                        * pg_bigm.enable_recheck is disabled or the search word is the
+                        * special one so that the index can return the exact result.
+                        */
+                       Assert(extra_data != NULL);
+                       *recheck = bigm_enable_recheck &&
+                               (*((bool *) extra_data) || (nkeys != 1));
+
                        /* Check if all extracted bigrams are presented. */
                        res = true;
                        for (i = 0; i < nkeys; i++)
@@ -217,6 +247,35 @@ gin_bigm_consistent(PG_FUNCTION_ARGS)
                                }
                        }
                        break;
+               case SimilarityStrategyNumber:
+                       /* Count the matches */
+                       *recheck = bigm_enable_recheck;
+                       ntrue = 0;
+                       for (i = 0; i < nkeys; i++)
+                       {
+                               if (check[i])
+                                       ntrue++;
+                       }
+
+                       /*--------------------
+                        * If DIVUNION is defined then similarity formula is:
+                        * c / (len1 + len2 - c)
+                        * where c is number of common bigrams and it stands as ntrue in
+                        * this code.  Here we don't know value of len2 but we can assume
+                        * that c (ntrue) is a lower bound of len2, so upper bound of
+                        * similarity is:
+                        * c / (len1 + c - c)  => c / len1
+                        * If DIVUNION is not defined then similarity formula is:
+                        * c / max(len1, len2)
+                        * And again, c (ntrue) is a lower bound of len2, but c <= len1
+                        * just by definition and, consequently, upper bound of
+                        * similarity is just c / len1.
+                        * So, independently on DIVUNION the upper bound formula is the same.
+                        */
+                       res = (nkeys == 0) ? false :
+                               ((((float4) ntrue) / ((float4) nkeys)) >=
+                                 (float4) bigm_similarity_limit);
+                       break;
                default:
                        elog(ERROR, "unrecognized strategy number: %d", strategy);
                        res = false;            /* keep compiler quiet */
@@ -226,16 +285,82 @@ gin_bigm_consistent(PG_FUNCTION_ARGS)
        PG_RETURN_BOOL(res);
 }
 
+/* triConsistent function is available only in 9.4 or later */
+#if PG_VERSION_NUM >= 90400
+Datum
+gin_bigm_triconsistent(PG_FUNCTION_ARGS)
+{
+       GinTernaryValue  *check = (GinTernaryValue *) PG_GETARG_POINTER(0);
+       StrategyNumber strategy = PG_GETARG_UINT16(1);
+
+       /* text    *query = PG_GETARG_TEXT_P(2); */
+       int32           nkeys = PG_GETARG_INT32(3);
+       Pointer    *extra_data = (Pointer *) PG_GETARG_POINTER(4);
+       GinTernaryValue res = GIN_MAYBE;
+       int32           i,
+                               ntrue;
+
+       switch (strategy)
+       {
+               case LikeStrategyNumber:
+                       /*
+                        * Don't recheck the heap tuple against the query if either
+                        * pg_bigm.enable_recheck is disabled or the search word is the
+                        * special one so that the index can return the exact result.
+                        */
+                       res = (bigm_enable_recheck &&
+                                  (*((bool *) extra_data) || (nkeys != 1))) ?
+                               GIN_MAYBE : GIN_TRUE;
+
+                       /* Check if all extracted bigrams are presented. */
+                       for (i = 0; i < nkeys; i++)
+                       {
+                               if (check[i] == GIN_FALSE)
+                               {
+                                       res = GIN_FALSE;
+                                       break;
+                               }
+                       }
+                       break;
+               case SimilarityStrategyNumber:
+                       /* Count the matches */
+                       ntrue = 0;
+                       for (i = 0; i < nkeys; i++)
+                       {
+                               if (check[i] != GIN_FALSE)
+                                       ntrue++;
+                       }
+
+                       /*
+                        * See comment in gin_bigm_consistent() about upper bound formula
+                        */
+                       res = (nkeys == 0) ? GIN_FALSE :
+                               (((((float4) ntrue) / ((float4) nkeys)) >=
+                                 (float4) bigm_similarity_limit) ? GIN_MAYBE : GIN_FALSE);
+
+                       if (res != GIN_FALSE && !bigm_enable_recheck)
+                               res = GIN_TRUE;
+                       break;
+               default:
+                       elog(ERROR, "unrecognized strategy number: %d", strategy);
+                       res = GIN_FALSE;                /* keep compiler quiet */
+                       break;
+       }
+
+       PG_RETURN_GIN_TERNARY_VALUE(res);
+}
+#endif /* PG_VERSION_NUM >= 90400 */
+
 Datum
 gin_bigm_compare_partial(PG_FUNCTION_ARGS)
 {
-       text    *arg1 = PG_GETARG_TEXT_PP(0);
-       text    *arg2 = PG_GETARG_TEXT_PP(1);
-       char    *a1p;
-       char    *a2p;
-       int             mblen1;
-       int             mblen2;
-       int             res;
+       text       *arg1 = PG_GETARG_TEXT_PP(0);
+       text       *arg2 = PG_GETARG_TEXT_PP(1);
+       char       *a1p;
+       char       *a2p;
+       int                     mblen1;
+       int                     mblen2;
+       int                     res;
 
        a1p = VARDATA_ANY(arg1);
        a2p = VARDATA_ANY(arg2);
@@ -267,21 +392,42 @@ pg_gin_pending_stats(PG_FUNCTION_ARGS)
        HeapTuple       tuple;
        TupleDesc       tupdesc;
 
+       indexRel = relation_open(indexOid, AccessShareLock);
+
+       if (indexRel->rd_rel->relkind != RELKIND_INDEX ||
+               indexRel->rd_rel->relam != GIN_AM_OID)
+               ereport(ERROR,
+                               (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+                                errmsg("relation \"%s\" is not a GIN index",
+                                               RelationGetRelationName(indexRel))));
+
+       /*
+        * Reject attempts to read non-local temporary relations; we would be
+        * likely to get wrong data since we have no visibility into the owning
+        * session's local buffers.
+        */
+       if (RELATION_IS_OTHER_TEMP(indexRel))
+               ereport(ERROR,
+                               (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+                                errmsg("cannot access temporary indexes of other sessions")));
+
        /*
         * Obtain statistic information from the meta page
         */
-       indexRel = index_open(indexOid, AccessShareLock);
        metabuffer = ReadBuffer(indexRel, GIN_METAPAGE_BLKNO);
        LockBuffer(metabuffer, GIN_SHARE);
        metapage = BufferGetPage(metabuffer);
        metadata = GinPageGetMeta(metapage);
-       index_close(indexRel, AccessShareLock);
 
        /*
-        * Construct a tuple descriptor for the result row. This must
-        * match this function's pg_bigm--x.x.sql entry.
+        * Construct a tuple descriptor for the result row. This must match this
+        * function's pg_bigm--x.x.sql entry.
         */
+ #if PG_VERSION_NUM >= 120000
+       tupdesc = CreateTemplateTupleDesc(2);
+#else
        tupdesc = CreateTemplateTupleDesc(2, false);
+#endif
        TupleDescInitEntry(tupdesc, (AttrNumber) 1,
                                           "pages", INT4OID, -1, 0);
        TupleDescInitEntry(tupdesc, (AttrNumber) 2,
@@ -297,6 +443,7 @@ pg_gin_pending_stats(PG_FUNCTION_ARGS)
        isnull[1] = false;
 
        UnlockReleaseBuffer(metabuffer);
+       relation_close(indexRel, AccessShareLock);
 
        tuple = heap_form_tuple(tupdesc, values, isnull);
        PG_RETURN_DATUM(HeapTupleGetDatum(tuple));