1 /*-------------------------------------------------------------------------
3 * Portions Copyright (c) 2007-2012, PostgreSQL Global Development Group
4 * Portions Copyright (c) 2013-2015, NTT DATA Corporation
8 * Support full text search using bigrams.
9 * Author: NTT DATA Corporation
11 *-------------------------------------------------------------------------
17 #include "access/gin.h"
18 #include "access/gin_private.h"
19 #include "access/itup.h"
20 #include "access/skey.h"
21 #include "access/tuptoaster.h"
22 #include "catalog/pg_type.h"
24 #include "mb/pg_wchar.h"
25 #include "storage/bufmgr.h"
26 #include "storage/bufpage.h"
27 #include "tsearch/ts_locale.h"
28 #include "utils/array.h"
29 #include "utils/builtins.h"
32 PG_FUNCTION_INFO_V1(gin_extract_value_bigm);
33 Datum gin_extract_value_bigm(PG_FUNCTION_ARGS);
35 PG_FUNCTION_INFO_V1(gin_extract_query_bigm);
36 Datum gin_extract_query_bigm(PG_FUNCTION_ARGS);
38 PG_FUNCTION_INFO_V1(gin_bigm_consistent);
39 Datum gin_bigm_consistent(PG_FUNCTION_ARGS);
41 PG_FUNCTION_INFO_V1(gin_bigm_compare_partial);
42 Datum gin_bigm_compare_partial(PG_FUNCTION_ARGS);
44 PG_FUNCTION_INFO_V1(pg_gin_pending_stats);
45 Datum pg_gin_pending_stats(PG_FUNCTION_ARGS);
48 gin_extract_value_bigm(PG_FUNCTION_ARGS)
50 text *val = (text *) PG_GETARG_TEXT_P(0);
51 int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
52 Datum *entries = NULL;
58 bgm = generate_bigm(VARDATA(val), VARSIZE(val) - VARHDRSZ);
59 bgmlen = ARRNELEM(bgm);
67 entries = (Datum *) palloc(sizeof(Datum) * bgmlen);
70 for (i = 0; i < bgmlen; i++)
72 text *item = cstring_to_text_with_len(ptr->str, ptr->bytelen);
73 entries[i] = PointerGetDatum(item);
78 PG_RETURN_POINTER(entries);
82 gin_extract_query_bigm(PG_FUNCTION_ARGS)
84 text *val = (text *) PG_GETARG_TEXT_P(0);
85 int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
86 StrategyNumber strategy = PG_GETARG_UINT16(2);
88 bool **pmatch = (bool **) PG_GETARG_POINTER(3);
89 Pointer **extra_data = (Pointer **) PG_GETARG_POINTER(4);
90 /* bool **nullFlags = (bool **) PG_GETARG_POINTER(5); */
91 int32 *searchMode = (int32 *) PG_GETARG_POINTER(6);
92 Datum *entries = NULL;
101 case LikeStrategyNumber:
103 char *str = VARDATA(val);
104 int slen = VARSIZE(val) - VARHDRSZ;
108 * For wildcard search we extract all the bigrams that every
109 * potentially-matching string must include.
111 bgm = generate_wildcard_bigm(str, slen, &removeDups);
112 bgmlen = ARRNELEM(bgm);
115 * Check whether the heap tuple fetched by index search needs to be
116 * rechecked against the query. If the search word consists of one
117 * or two characters and doesn't contain any space character, we can
118 * guarantee that the index test would be exact. That is, the heap
119 * tuple does match the query, so it doesn't need to be rechecked.
121 *extra_data = (Pointer *) palloc(sizeof(bool));
122 recheck = (bool *) *extra_data;
123 if (bgmlen == 1 && !removeDups)
128 for (sp = str; (sp - str) < slen;)
136 sp += IS_HIGHBIT_SET(*sp) ? pg_mblen(sp) : 1;
143 case SimilarityStrategyNumber:
145 bgm = generate_bigm(VARDATA(val), VARSIZE(val) - VARHDRSZ);
146 bgmlen = ARRNELEM(bgm);
150 elog(ERROR, "unrecognized strategy number: %d", strategy);
151 bgm = NULL; /* keep compiler quiet */
155 *nentries = (bigm_gin_key_limit == 0) ?
156 bgmlen : Min(bigm_gin_key_limit, bgmlen);
161 entries = (Datum *) palloc(sizeof(Datum) * *nentries);
163 for (i = 0; i < *nentries; i++)
170 *pmatch = (bool *) palloc0(sizeof(bool) * *nentries);
173 item = cstring_to_text_with_len(ptr->str, ptr->bytelen);
174 entries[i] = PointerGetDatum(item);
180 * If no bigram was extracted then we have to scan all the index.
183 *searchMode = GIN_SEARCH_MODE_ALL;
185 PG_RETURN_POINTER(entries);
189 gin_bigm_consistent(PG_FUNCTION_ARGS)
191 bool *check = (bool *) PG_GETARG_POINTER(0);
192 StrategyNumber strategy = PG_GETARG_UINT16(1);
194 /* text *query = PG_GETARG_TEXT_P(2); */
195 int32 nkeys = PG_GETARG_INT32(3);
197 Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4);
198 bool *recheck = (bool *) PG_GETARG_POINTER(5);
205 case LikeStrategyNumber:
207 * Don't recheck the heap tuple against the query if either
208 * pg_bigm.enable_recheck is disabled or the search word is
209 * the special one so that the index can return the exact
212 Assert(extra_data != NULL);
213 *recheck = bigm_enable_recheck &&
214 (*((bool *) extra_data) || (nkeys != 1));
216 /* Check if all extracted bigrams are presented. */
218 for (i = 0; i < nkeys; i++)
227 case SimilarityStrategyNumber:
228 /* Count the matches */
229 *recheck = bigm_enable_recheck;
231 for (i = 0; i < nkeys; i++)
237 res = (nkeys == ntrue) ? true :
238 ((((((float4) ntrue) / ((float4) (nkeys - ntrue)))) >=
239 (float4) bigm_similarity_limit) ? true : false);
241 res = (nkeys == 0) ? false :
242 ((((((float4) ntrue) / ((float4) nkeys))) >=
243 (float4) bigm_similarity_limit) ? true : false);
247 elog(ERROR, "unrecognized strategy number: %d", strategy);
248 res = false; /* keep compiler quiet */
256 gin_bigm_compare_partial(PG_FUNCTION_ARGS)
258 text *arg1 = PG_GETARG_TEXT_PP(0);
259 text *arg2 = PG_GETARG_TEXT_PP(1);
266 a1p = VARDATA_ANY(arg1);
267 a2p = VARDATA_ANY(arg2);
269 mblen1 = pg_mblen(a1p);
270 mblen2 = pg_mblen(a2p);
272 if (mblen1 != mblen2)
275 res = memcmp(a1p, a2p, mblen1) ? 1 : 0;
276 PG_RETURN_INT32(res);
280 * Report both number of pages and number of heap tuples that
281 * are in the pending list.
284 pg_gin_pending_stats(PG_FUNCTION_ARGS)
286 Oid indexOid = PG_GETARG_OID(0);
290 GinMetaPageData *metadata;
297 * Obtain statistic information from the meta page
299 indexRel = index_open(indexOid, AccessShareLock);
300 metabuffer = ReadBuffer(indexRel, GIN_METAPAGE_BLKNO);
301 LockBuffer(metabuffer, GIN_SHARE);
302 metapage = BufferGetPage(metabuffer);
303 metadata = GinPageGetMeta(metapage);
304 index_close(indexRel, AccessShareLock);
307 * Construct a tuple descriptor for the result row. This must
308 * match this function's pg_bigm--x.x.sql entry.
310 tupdesc = CreateTemplateTupleDesc(2, false);
311 TupleDescInitEntry(tupdesc, (AttrNumber) 1,
312 "pages", INT4OID, -1, 0);
313 TupleDescInitEntry(tupdesc, (AttrNumber) 2,
314 "tuples", INT8OID, -1, 0);
315 tupdesc = BlessTupleDesc(tupdesc);
318 values[0] = Int32GetDatum(metadata->nPendingPages);
322 values[1] = Int64GetDatum(metadata->nPendingHeapTuples);
325 UnlockReleaseBuffer(metabuffer);
327 tuple = heap_form_tuple(tupdesc, values, isnull);
328 PG_RETURN_DATUM(HeapTupleGetDatum(tuple));