1 /*-------------------------------------------------------------------------
3 * Portions Copyright (c) 2007-2012, PostgreSQL Global Development Group
4 * Portions Copyright (c) 2013-2015, NTT DATA Corporation
8 * Support full text search using bigrams.
9 * Author: NTT DATA Corporation
11 *-------------------------------------------------------------------------
17 #include "access/gin.h"
18 #include "access/gin_private.h"
19 #include "access/itup.h"
20 #include "access/skey.h"
21 #include "access/tuptoaster.h"
22 #include "catalog/pg_type.h"
24 #include "mb/pg_wchar.h"
25 #include "storage/bufmgr.h"
26 #include "storage/bufpage.h"
27 #include "tsearch/ts_locale.h"
28 #include "utils/array.h"
29 #include "utils/builtins.h"
32 PG_FUNCTION_INFO_V1(gin_extract_value_bigm);
33 PG_FUNCTION_INFO_V1(gin_extract_query_bigm);
34 PG_FUNCTION_INFO_V1(gin_bigm_consistent);
35 PG_FUNCTION_INFO_V1(gin_bigm_compare_partial);
36 PG_FUNCTION_INFO_V1(pg_gin_pending_stats);
39 * The function prototypes are created as a part of PG_FUNCTION_INFO_V1
40 * macro since 9.4, and hence the declaration of the function prototypes
41 * here is necessary only for 9.3 or before.
43 #if PG_VERSION_NUM < 90400
44 Datum gin_extract_value_bigm(PG_FUNCTION_ARGS);
45 Datum gin_extract_query_bigm(PG_FUNCTION_ARGS);
46 Datum gin_bigm_consistent(PG_FUNCTION_ARGS);
47 Datum gin_bigm_compare_partial(PG_FUNCTION_ARGS);
48 Datum pg_gin_pending_stats(PG_FUNCTION_ARGS);
52 gin_extract_value_bigm(PG_FUNCTION_ARGS)
54 text *val = (text *) PG_GETARG_TEXT_P(0);
55 int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
56 Datum *entries = NULL;
62 bgm = generate_bigm(VARDATA(val), VARSIZE(val) - VARHDRSZ);
63 bgmlen = ARRNELEM(bgm);
71 entries = (Datum *) palloc(sizeof(Datum) * bgmlen);
74 for (i = 0; i < bgmlen; i++)
76 text *item = cstring_to_text_with_len(ptr->str, ptr->bytelen);
77 entries[i] = PointerGetDatum(item);
82 PG_RETURN_POINTER(entries);
86 gin_extract_query_bigm(PG_FUNCTION_ARGS)
88 text *val = (text *) PG_GETARG_TEXT_P(0);
89 int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
90 StrategyNumber strategy = PG_GETARG_UINT16(2);
92 bool **pmatch = (bool **) PG_GETARG_POINTER(3);
93 Pointer **extra_data = (Pointer **) PG_GETARG_POINTER(4);
94 /* bool **nullFlags = (bool **) PG_GETARG_POINTER(5); */
95 int32 *searchMode = (int32 *) PG_GETARG_POINTER(6);
96 Datum *entries = NULL;
105 case LikeStrategyNumber:
107 char *str = VARDATA(val);
108 int slen = VARSIZE(val) - VARHDRSZ;
112 * For wildcard search we extract all the bigrams that every
113 * potentially-matching string must include.
115 bgm = generate_wildcard_bigm(str, slen, &removeDups);
116 bgmlen = ARRNELEM(bgm);
119 * Check whether the heap tuple fetched by index search needs to be
120 * rechecked against the query. If the search word consists of one
121 * or two characters and doesn't contain any space character, we can
122 * guarantee that the index test would be exact. That is, the heap
123 * tuple does match the query, so it doesn't need to be rechecked.
125 *extra_data = (Pointer *) palloc(sizeof(bool));
126 recheck = (bool *) *extra_data;
127 if (bgmlen == 1 && !removeDups)
132 for (sp = str; (sp - str) < slen;)
140 sp += IS_HIGHBIT_SET(*sp) ? pg_mblen(sp) : 1;
147 case SimilarityStrategyNumber:
149 bgm = generate_bigm(VARDATA(val), VARSIZE(val) - VARHDRSZ);
150 bgmlen = ARRNELEM(bgm);
154 elog(ERROR, "unrecognized strategy number: %d", strategy);
155 bgm = NULL; /* keep compiler quiet */
159 *nentries = (bigm_gin_key_limit == 0) ?
160 bgmlen : Min(bigm_gin_key_limit, bgmlen);
165 entries = (Datum *) palloc(sizeof(Datum) * *nentries);
167 for (i = 0; i < *nentries; i++)
174 *pmatch = (bool *) palloc0(sizeof(bool) * *nentries);
177 item = cstring_to_text_with_len(ptr->str, ptr->bytelen);
178 entries[i] = PointerGetDatum(item);
184 * If no bigram was extracted then we have to scan all the index.
187 *searchMode = GIN_SEARCH_MODE_ALL;
189 PG_RETURN_POINTER(entries);
193 gin_bigm_consistent(PG_FUNCTION_ARGS)
195 bool *check = (bool *) PG_GETARG_POINTER(0);
196 StrategyNumber strategy = PG_GETARG_UINT16(1);
198 /* text *query = PG_GETARG_TEXT_P(2); */
199 int32 nkeys = PG_GETARG_INT32(3);
201 Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4);
202 bool *recheck = (bool *) PG_GETARG_POINTER(5);
209 case LikeStrategyNumber:
211 * Don't recheck the heap tuple against the query if either
212 * pg_bigm.enable_recheck is disabled or the search word is
213 * the special one so that the index can return the exact
216 Assert(extra_data != NULL);
217 *recheck = bigm_enable_recheck &&
218 (*((bool *) extra_data) || (nkeys != 1));
220 /* Check if all extracted bigrams are presented. */
222 for (i = 0; i < nkeys; i++)
231 case SimilarityStrategyNumber:
232 /* Count the matches */
233 *recheck = bigm_enable_recheck;
235 for (i = 0; i < nkeys; i++)
241 res = (nkeys == ntrue) ? true :
242 ((((((float4) ntrue) / ((float4) (nkeys - ntrue)))) >=
243 (float4) bigm_similarity_limit) ? true : false);
245 res = (nkeys == 0) ? false :
246 ((((((float4) ntrue) / ((float4) nkeys))) >=
247 (float4) bigm_similarity_limit) ? true : false);
251 elog(ERROR, "unrecognized strategy number: %d", strategy);
252 res = false; /* keep compiler quiet */
260 gin_bigm_compare_partial(PG_FUNCTION_ARGS)
262 text *arg1 = PG_GETARG_TEXT_PP(0);
263 text *arg2 = PG_GETARG_TEXT_PP(1);
270 a1p = VARDATA_ANY(arg1);
271 a2p = VARDATA_ANY(arg2);
273 mblen1 = pg_mblen(a1p);
274 mblen2 = pg_mblen(a2p);
276 if (mblen1 != mblen2)
279 res = memcmp(a1p, a2p, mblen1) ? 1 : 0;
280 PG_RETURN_INT32(res);
284 * Report both number of pages and number of heap tuples that
285 * are in the pending list.
288 pg_gin_pending_stats(PG_FUNCTION_ARGS)
290 Oid indexOid = PG_GETARG_OID(0);
294 GinMetaPageData *metadata;
301 * Obtain statistic information from the meta page
303 indexRel = index_open(indexOid, AccessShareLock);
304 metabuffer = ReadBuffer(indexRel, GIN_METAPAGE_BLKNO);
305 LockBuffer(metabuffer, GIN_SHARE);
306 metapage = BufferGetPage(metabuffer);
307 metadata = GinPageGetMeta(metapage);
308 index_close(indexRel, AccessShareLock);
311 * Construct a tuple descriptor for the result row. This must
312 * match this function's pg_bigm--x.x.sql entry.
314 tupdesc = CreateTemplateTupleDesc(2, false);
315 TupleDescInitEntry(tupdesc, (AttrNumber) 1,
316 "pages", INT4OID, -1, 0);
317 TupleDescInitEntry(tupdesc, (AttrNumber) 2,
318 "tuples", INT8OID, -1, 0);
319 tupdesc = BlessTupleDesc(tupdesc);
322 values[0] = Int32GetDatum(metadata->nPendingPages);
326 values[1] = Int64GetDatum(metadata->nPendingHeapTuples);
329 UnlockReleaseBuffer(metabuffer);
331 tuple = heap_form_tuple(tupdesc, values, isnull);
332 PG_RETURN_DATUM(HeapTupleGetDatum(tuple));