1 /*-------------------------------------------------------------------------
3 * Portions Copyright (c) 2007-2012, PostgreSQL Global Development Group
4 * Portions Copyright (c) 2013-2015, NTT DATA Corporation
8 * Support full text search using bigrams.
9 * Author: NTT DATA Corporation
11 *-------------------------------------------------------------------------
17 #include "access/gin.h"
18 #include "access/gin_private.h"
19 #include "access/itup.h"
20 #include "access/skey.h"
21 #include "access/tuptoaster.h"
22 #include "catalog/pg_type.h"
24 #include "mb/pg_wchar.h"
25 #include "storage/bufmgr.h"
26 #include "storage/bufpage.h"
27 #include "tsearch/ts_locale.h"
28 #include "utils/array.h"
29 #include "utils/builtins.h"
32 PG_FUNCTION_INFO_V1(gin_extract_value_bigm);
33 PG_FUNCTION_INFO_V1(gin_extract_query_bigm);
34 PG_FUNCTION_INFO_V1(gin_bigm_consistent);
35 PG_FUNCTION_INFO_V1(gin_bigm_compare_partial);
36 PG_FUNCTION_INFO_V1(pg_gin_pending_stats);
38 /* triConsistent function is available only in 9.4 or later */
39 #if PG_VERSION_NUM >= 90400
40 PG_FUNCTION_INFO_V1(gin_bigm_triconsistent);
44 * The function prototypes are created as a part of PG_FUNCTION_INFO_V1
45 * macro since 9.4, and hence the declaration of the function prototypes
46 * here is necessary only for 9.3 or before.
48 #if PG_VERSION_NUM < 90400
49 Datum gin_extract_value_bigm(PG_FUNCTION_ARGS);
50 Datum gin_extract_query_bigm(PG_FUNCTION_ARGS);
51 Datum gin_bigm_consistent(PG_FUNCTION_ARGS);
52 Datum gin_bigm_compare_partial(PG_FUNCTION_ARGS);
53 Datum pg_gin_pending_stats(PG_FUNCTION_ARGS);
57 gin_extract_value_bigm(PG_FUNCTION_ARGS)
59 text *val = (text *) PG_GETARG_TEXT_P(0);
60 int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
61 Datum *entries = NULL;
67 bgm = generate_bigm(VARDATA(val), VARSIZE(val) - VARHDRSZ);
68 bgmlen = ARRNELEM(bgm);
76 entries = (Datum *) palloc(sizeof(Datum) * bgmlen);
79 for (i = 0; i < bgmlen; i++)
81 text *item = cstring_to_text_with_len(ptr->str, ptr->bytelen);
83 entries[i] = PointerGetDatum(item);
88 PG_RETURN_POINTER(entries);
92 gin_extract_query_bigm(PG_FUNCTION_ARGS)
94 text *val = (text *) PG_GETARG_TEXT_P(0);
95 int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
96 StrategyNumber strategy = PG_GETARG_UINT16(2);
98 bool **pmatch = (bool **) PG_GETARG_POINTER(3);
99 Pointer **extra_data = (Pointer **) PG_GETARG_POINTER(4);
101 /* bool **nullFlags = (bool **) PG_GETARG_POINTER(5); */
102 int32 *searchMode = (int32 *) PG_GETARG_POINTER(6);
103 Datum *entries = NULL;
112 case LikeStrategyNumber:
114 char *str = VARDATA(val);
115 int slen = VARSIZE(val) - VARHDRSZ;
119 * For wildcard search we extract all the bigrams that every
120 * potentially-matching string must include.
122 bgm = generate_wildcard_bigm(str, slen, &removeDups);
123 bgmlen = ARRNELEM(bgm);
126 * Check whether the heap tuple fetched by index search needs to
127 * be rechecked against the query. If the search word consists of
128 * one or two characters and doesn't contain any space character,
129 * we can guarantee that the index test would be exact. That is,
130 * the heap tuple does match the query, so it doesn't need to be
133 *extra_data = (Pointer *) palloc(sizeof(bool));
134 recheck = (bool *) *extra_data;
135 if (bgmlen == 1 && !removeDups)
140 for (sp = str; (sp - str) < slen;)
148 sp += IS_HIGHBIT_SET(*sp) ? pg_mblen(sp) : 1;
155 case SimilarityStrategyNumber:
157 bgm = generate_bigm(VARDATA(val), VARSIZE(val) - VARHDRSZ);
158 bgmlen = ARRNELEM(bgm);
162 elog(ERROR, "unrecognized strategy number: %d", strategy);
163 bgm = NULL; /* keep compiler quiet */
167 *nentries = (bigm_gin_key_limit == 0) ?
168 bgmlen : Min(bigm_gin_key_limit, bgmlen);
173 entries = (Datum *) palloc(sizeof(Datum) * *nentries);
175 for (i = 0; i < *nentries; i++)
182 *pmatch = (bool *) palloc0(sizeof(bool) * *nentries);
185 item = cstring_to_text_with_len(ptr->str, ptr->bytelen);
186 entries[i] = PointerGetDatum(item);
192 * If no bigram was extracted then we have to scan all the index.
195 *searchMode = GIN_SEARCH_MODE_ALL;
197 PG_RETURN_POINTER(entries);
201 gin_bigm_consistent(PG_FUNCTION_ARGS)
203 bool *check = (bool *) PG_GETARG_POINTER(0);
204 StrategyNumber strategy = PG_GETARG_UINT16(1);
206 /* text *query = PG_GETARG_TEXT_P(2); */
207 int32 nkeys = PG_GETARG_INT32(3);
209 Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4);
210 bool *recheck = (bool *) PG_GETARG_POINTER(5);
217 case LikeStrategyNumber:
220 * Don't recheck the heap tuple against the query if either
221 * pg_bigm.enable_recheck is disabled or the search word is the
222 * special one so that the index can return the exact result.
224 Assert(extra_data != NULL);
225 *recheck = bigm_enable_recheck &&
226 (*((bool *) extra_data) || (nkeys != 1));
228 /* Check if all extracted bigrams are presented. */
230 for (i = 0; i < nkeys; i++)
239 case SimilarityStrategyNumber:
240 /* Count the matches */
241 *recheck = bigm_enable_recheck;
243 for (i = 0; i < nkeys; i++)
249 res = (nkeys == ntrue) ? true :
250 ((((((float4) ntrue) / ((float4) (nkeys - ntrue)))) >=
251 (float4) bigm_similarity_limit) ? true : false);
253 res = (nkeys == 0) ? false :
254 ((((((float4) ntrue) / ((float4) nkeys))) >=
255 (float4) bigm_similarity_limit) ? true : false);
259 elog(ERROR, "unrecognized strategy number: %d", strategy);
260 res = false; /* keep compiler quiet */
267 /* triConsistent function is available only in 9.4 or later */
268 #if PG_VERSION_NUM >= 90400
270 gin_bigm_triconsistent(PG_FUNCTION_ARGS)
272 GinTernaryValue *check = (GinTernaryValue *) PG_GETARG_POINTER(0);
273 StrategyNumber strategy = PG_GETARG_UINT16(1);
275 /* text *query = PG_GETARG_TEXT_P(2); */
276 int32 nkeys = PG_GETARG_INT32(3);
277 Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4);
278 GinTernaryValue res = GIN_MAYBE;
284 case LikeStrategyNumber:
286 * Don't recheck the heap tuple against the query if either
287 * pg_bigm.enable_recheck is disabled or the search word is the
288 * special one so that the index can return the exact result.
290 res = (bigm_enable_recheck &&
291 (*((bool *) extra_data) || (nkeys != 1))) ?
292 GIN_MAYBE : GIN_TRUE;
294 /* Check if all extracted bigrams are presented. */
295 for (i = 0; i < nkeys; i++)
297 if (check[i] == GIN_FALSE)
304 case SimilarityStrategyNumber:
305 /* Count the matches */
307 for (i = 0; i < nkeys; i++)
309 if (check[i] != GIN_FALSE)
313 res = (nkeys == ntrue) ? GIN_MAYBE :
314 (((((float4) ntrue) / ((float4) (nkeys - ntrue))) >=
315 (float4) bigm_similarity_limit) ? GIN_MAYBE : GIN_FALSE);
317 res = (nkeys == 0) ? GIN_FALSE :
318 (((((float4) ntrue) / ((float4) nkeys)) >=
319 (float4) bigm_similarity_limit) ? GIN_MAYBE : GIN_FALSE);
321 if (res != GIN_FALSE && !bigm_enable_recheck)
325 elog(ERROR, "unrecognized strategy number: %d", strategy);
326 res = GIN_FALSE; /* keep compiler quiet */
330 PG_RETURN_GIN_TERNARY_VALUE(res);
332 #endif /* PG_VERSION_NUM >= 90400 */
335 gin_bigm_compare_partial(PG_FUNCTION_ARGS)
337 text *arg1 = PG_GETARG_TEXT_PP(0);
338 text *arg2 = PG_GETARG_TEXT_PP(1);
345 a1p = VARDATA_ANY(arg1);
346 a2p = VARDATA_ANY(arg2);
348 mblen1 = pg_mblen(a1p);
349 mblen2 = pg_mblen(a2p);
351 if (mblen1 != mblen2)
354 res = memcmp(a1p, a2p, mblen1) ? 1 : 0;
355 PG_RETURN_INT32(res);
359 * Report both number of pages and number of heap tuples that
360 * are in the pending list.
363 pg_gin_pending_stats(PG_FUNCTION_ARGS)
365 Oid indexOid = PG_GETARG_OID(0);
369 GinMetaPageData *metadata;
376 * Obtain statistic information from the meta page
378 indexRel = index_open(indexOid, AccessShareLock);
379 metabuffer = ReadBuffer(indexRel, GIN_METAPAGE_BLKNO);
380 LockBuffer(metabuffer, GIN_SHARE);
381 metapage = BufferGetPage(metabuffer);
382 metadata = GinPageGetMeta(metapage);
383 index_close(indexRel, AccessShareLock);
386 * Construct a tuple descriptor for the result row. This must match this
387 * function's pg_bigm--x.x.sql entry.
389 tupdesc = CreateTemplateTupleDesc(2, false);
390 TupleDescInitEntry(tupdesc, (AttrNumber) 1,
391 "pages", INT4OID, -1, 0);
392 TupleDescInitEntry(tupdesc, (AttrNumber) 2,
393 "tuples", INT8OID, -1, 0);
394 tupdesc = BlessTupleDesc(tupdesc);
397 values[0] = Int32GetDatum(metadata->nPendingPages);
401 values[1] = Int64GetDatum(metadata->nPendingHeapTuples);
404 UnlockReleaseBuffer(metabuffer);
406 tuple = heap_form_tuple(tupdesc, values, isnull);
407 PG_RETURN_DATUM(HeapTupleGetDatum(tuple));