/*-------------------------------------------------------------------------
*
+ * Portions Copyright (c) 2017-2023, pg_bigm Development Group
+ * Portions Copyright (c) 2013-2016, NTT DATA Corporation
* Portions Copyright (c) 2007-2012, PostgreSQL Global Development Group
*
* Changelog:
- * 2013/01/09
- * Support full text search using bigrams.
- * Author: NTT DATA Corporation
+ * 2013/01/09
+ * Support full text search using bigrams.
+ * Author: NTT DATA Corporation
*
*-------------------------------------------------------------------------
*/
#include "access/gin.h"
#include "access/gin_private.h"
#include "access/itup.h"
+#if PG_VERSION_NUM >= 120000
+#include "access/relation.h"
+#endif
#include "access/skey.h"
+#if PG_VERSION_NUM < 130000
#include "access/tuptoaster.h"
+#endif
+#include "access/xlog.h"
+#if PG_VERSION_NUM > 90500
+#include "catalog/pg_am.h"
+#endif
#include "catalog/pg_type.h"
#include "funcapi.h"
#include "mb/pg_wchar.h"
#include "tsearch/ts_locale.h"
#include "utils/array.h"
#include "utils/builtins.h"
+#include "utils/rel.h"
PG_FUNCTION_INFO_V1(gin_extract_value_bigm);
-Datum gin_extract_value_bigm(PG_FUNCTION_ARGS);
-
PG_FUNCTION_INFO_V1(gin_extract_query_bigm);
-Datum gin_extract_query_bigm(PG_FUNCTION_ARGS);
-
PG_FUNCTION_INFO_V1(gin_bigm_consistent);
-Datum gin_bigm_consistent(PG_FUNCTION_ARGS);
-
PG_FUNCTION_INFO_V1(gin_bigm_compare_partial);
-Datum gin_bigm_compare_partial(PG_FUNCTION_ARGS);
-
PG_FUNCTION_INFO_V1(pg_gin_pending_stats);
+
+/* triConsistent function is available only in 9.4 or later */
+#if PG_VERSION_NUM >= 90400
+PG_FUNCTION_INFO_V1(gin_bigm_triconsistent);
+#endif
+
+/*
+ * The function prototypes are created as a part of PG_FUNCTION_INFO_V1
+ * macro since 9.4, and hence the declaration of the function prototypes
+ * here is necessary only for 9.3 or before.
+ */
+#if PG_VERSION_NUM < 90400
+Datum gin_extract_value_bigm(PG_FUNCTION_ARGS);
+Datum gin_extract_query_bigm(PG_FUNCTION_ARGS);
+Datum gin_bigm_consistent(PG_FUNCTION_ARGS);
+Datum gin_bigm_compare_partial(PG_FUNCTION_ARGS);
Datum pg_gin_pending_stats(PG_FUNCTION_ARGS);
+#endif
Datum
gin_extract_value_bigm(PG_FUNCTION_ARGS)
ptr = GETARR(bgm);
for (i = 0; i < bgmlen; i++)
{
- text *item = cstring_to_text_with_len(ptr->str, ptr->bytelen);
+ text *item = cstring_to_text_with_len(ptr->str, ptr->bytelen);
+
entries[i] = PointerGetDatum(item);
ptr++;
}
int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
StrategyNumber strategy = PG_GETARG_UINT16(2);
- bool **pmatch = (bool **) PG_GETARG_POINTER(3);
- Pointer **extra_data = (Pointer **) PG_GETARG_POINTER(4);
+ bool **pmatch = (bool **) PG_GETARG_POINTER(3);
+ Pointer **extra_data = (Pointer **) PG_GETARG_POINTER(4);
+
/* bool **nullFlags = (bool **) PG_GETARG_POINTER(5); */
int32 *searchMode = (int32 *) PG_GETARG_POINTER(6);
Datum *entries = NULL;
{
case LikeStrategyNumber:
{
- char *str = VARDATA(val);
- int slen = VARSIZE(val) - VARHDRSZ;
- bool *recheck;
+ char *str = VARDATA(val);
+ int slen = VARSIZE(val) - VARHDRSZ;
+ bool *recheck;
/*
* For wildcard search we extract all the bigrams that every
bgmlen = ARRNELEM(bgm);
/*
- * Check whether the heap tuple fetched by index search needs to be
- * rechecked against the query. If the search word consists of one
- * or two characters and doesn't contain any space character, we can
- * guarantee that the index test would be exact. That is, the heap
- * tuple does match the query, so it doesn't need to be rechecked.
+ * Check whether the heap tuple fetched by index search needs to
+ * be rechecked against the query. If the search word consists of
+ * one or two characters and doesn't contain any space character,
+ * we can guarantee that the index test would be exact. That is,
+ * the heap tuple does match the query, so it doesn't need to be
+ * rechecked.
*/
*extra_data = (Pointer *) palloc(sizeof(bool));
recheck = (bool *) *extra_data;
if (bgmlen == 1 && !removeDups)
{
- const char *sp;
+ const char *sp;
*recheck = false;
for (sp = str; (sp - str) < slen;)
*recheck = true;
break;
}
+ case SimilarityStrategyNumber:
+ {
+ bgm = generate_bigm(VARDATA(val), VARSIZE(val) - VARHDRSZ);
+ bgmlen = ARRNELEM(bgm);
+ break;
+ }
default:
elog(ERROR, "unrecognized strategy number: %d", strategy);
bgm = NULL; /* keep compiler quiet */
ptr = GETARR(bgm);
for (i = 0; i < *nentries; i++)
{
- text *item;
+ text *item;
if (ptr->pmatch)
{
/* text *query = PG_GETARG_TEXT_P(2); */
int32 nkeys = PG_GETARG_INT32(3);
- Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4);
+ Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4);
bool *recheck = (bool *) PG_GETARG_POINTER(5);
bool res;
int32 i;
-
- Assert(nkeys > 0);
-
- /*
- * Don't recheck the heap tuple against the query if either
- * pg_bigm.enable_recheck is disabled or the search word is
- * the special one so that the index can return the exact
- * result.
- */
- *recheck = bigm_enable_recheck &&
- ((nkeys > 1) || *((bool *) extra_data));
+ int32 ntrue;
switch (strategy)
{
case LikeStrategyNumber:
+
+ /*
+ * Don't recheck the heap tuple against the query if either
+ * pg_bigm.enable_recheck is disabled or the search word is the
+ * special one so that the index can return the exact result.
+ */
+ Assert(extra_data != NULL);
+ *recheck = bigm_enable_recheck &&
+ (*((bool *) extra_data) || (nkeys != 1));
+
/* Check if all extracted bigrams are presented. */
res = true;
for (i = 0; i < nkeys; i++)
}
}
break;
+ case SimilarityStrategyNumber:
+ /* Count the matches */
+ *recheck = bigm_enable_recheck;
+ ntrue = 0;
+ for (i = 0; i < nkeys; i++)
+ {
+ if (check[i])
+ ntrue++;
+ }
+
+ /*--------------------
+ * If DIVUNION is defined then similarity formula is:
+ * c / (len1 + len2 - c)
+ * where c is number of common bigrams and it stands as ntrue in
+ * this code. Here we don't know value of len2 but we can assume
+ * that c (ntrue) is a lower bound of len2, so upper bound of
+ * similarity is:
+ * c / (len1 + c - c) => c / len1
+ * If DIVUNION is not defined then similarity formula is:
+ * c / max(len1, len2)
+ * And again, c (ntrue) is a lower bound of len2, but c <= len1
+ * just by definition and, consequently, upper bound of
+ * similarity is just c / len1.
+ * So, independently on DIVUNION the upper bound formula is the same.
+ */
+ res = (nkeys == 0) ? false :
+ ((((float4) ntrue) / ((float4) nkeys)) >=
+ (float4) bigm_similarity_limit);
+ break;
default:
elog(ERROR, "unrecognized strategy number: %d", strategy);
res = false; /* keep compiler quiet */
PG_RETURN_BOOL(res);
}
+/* triConsistent function is available only in 9.4 or later */
+#if PG_VERSION_NUM >= 90400
+Datum
+gin_bigm_triconsistent(PG_FUNCTION_ARGS)
+{
+ GinTernaryValue *check = (GinTernaryValue *) PG_GETARG_POINTER(0);
+ StrategyNumber strategy = PG_GETARG_UINT16(1);
+
+ /* text *query = PG_GETARG_TEXT_P(2); */
+ int32 nkeys = PG_GETARG_INT32(3);
+ Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4);
+ GinTernaryValue res = GIN_MAYBE;
+ int32 i,
+ ntrue;
+
+ switch (strategy)
+ {
+ case LikeStrategyNumber:
+ /*
+ * Don't recheck the heap tuple against the query if either
+ * pg_bigm.enable_recheck is disabled or the search word is the
+ * special one so that the index can return the exact result.
+ */
+ res = (bigm_enable_recheck &&
+ (*((bool *) extra_data) || (nkeys != 1))) ?
+ GIN_MAYBE : GIN_TRUE;
+
+ /* Check if all extracted bigrams are presented. */
+ for (i = 0; i < nkeys; i++)
+ {
+ if (check[i] == GIN_FALSE)
+ {
+ res = GIN_FALSE;
+ break;
+ }
+ }
+ break;
+ case SimilarityStrategyNumber:
+ /* Count the matches */
+ ntrue = 0;
+ for (i = 0; i < nkeys; i++)
+ {
+ if (check[i] != GIN_FALSE)
+ ntrue++;
+ }
+
+ /*
+ * See comment in gin_bigm_consistent() about upper bound formula
+ */
+ res = (nkeys == 0) ? GIN_FALSE :
+ (((((float4) ntrue) / ((float4) nkeys)) >=
+ (float4) bigm_similarity_limit) ? GIN_MAYBE : GIN_FALSE);
+
+ if (res != GIN_FALSE && !bigm_enable_recheck)
+ res = GIN_TRUE;
+ break;
+ default:
+ elog(ERROR, "unrecognized strategy number: %d", strategy);
+ res = GIN_FALSE; /* keep compiler quiet */
+ break;
+ }
+
+ PG_RETURN_GIN_TERNARY_VALUE(res);
+}
+#endif /* PG_VERSION_NUM >= 90400 */
+
Datum
gin_bigm_compare_partial(PG_FUNCTION_ARGS)
{
- text *arg1 = PG_GETARG_TEXT_PP(0);
- text *arg2 = PG_GETARG_TEXT_PP(1);
- char *a1p;
- char *a2p;
- int mblen1;
- int mblen2;
- int res;
+ text *arg1 = PG_GETARG_TEXT_PP(0);
+ text *arg2 = PG_GETARG_TEXT_PP(1);
+ char *a1p;
+ char *a2p;
+ int mblen1;
+ int mblen2;
+ int res;
a1p = VARDATA_ANY(arg1);
a2p = VARDATA_ANY(arg2);
HeapTuple tuple;
TupleDesc tupdesc;
+ indexRel = relation_open(indexOid, AccessShareLock);
+
+ if (indexRel->rd_rel->relkind != RELKIND_INDEX ||
+ indexRel->rd_rel->relam != GIN_AM_OID)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("relation \"%s\" is not a GIN index",
+ RelationGetRelationName(indexRel))));
+
+ /*
+ * Reject attempts to read non-local temporary relations; we would be
+ * likely to get wrong data since we have no visibility into the owning
+ * session's local buffers.
+ */
+ if (RELATION_IS_OTHER_TEMP(indexRel))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot access temporary indexes of other sessions")));
+
/*
* Obtain statistic information from the meta page
*/
- indexRel = index_open(indexOid, AccessShareLock);
metabuffer = ReadBuffer(indexRel, GIN_METAPAGE_BLKNO);
LockBuffer(metabuffer, GIN_SHARE);
metapage = BufferGetPage(metabuffer);
metadata = GinPageGetMeta(metapage);
- index_close(indexRel, AccessShareLock);
/*
- * Construct a tuple descriptor for the result row. This must
- * match this function's pg_bigm--x.x.sql entry.
+ * Construct a tuple descriptor for the result row. This must match this
+ * function's pg_bigm--x.x.sql entry.
*/
+ #if PG_VERSION_NUM >= 120000
+ tupdesc = CreateTemplateTupleDesc(2);
+#else
tupdesc = CreateTemplateTupleDesc(2, false);
+#endif
TupleDescInitEntry(tupdesc, (AttrNumber) 1,
"pages", INT4OID, -1, 0);
TupleDescInitEntry(tupdesc, (AttrNumber) 2,
isnull[1] = false;
UnlockReleaseBuffer(metabuffer);
+ relation_close(indexRel, AccessShareLock);
tuple = heap_form_tuple(tupdesc, values, isnull);
PG_RETURN_DATUM(HeapTupleGetDatum(tuple));