OSDN Git Service

Make gin_bigm_ops use triConsistent function only with PostgreSQL 9.4 or later.
[pgbigm/pg_bigm.git] / bigm_gin.c
1 /*-------------------------------------------------------------------------
2  *
3  * Portions Copyright (c) 2007-2012, PostgreSQL Global Development Group
4  * Portions Copyright (c) 2013-2015, NTT DATA Corporation
5  *
6  * Changelog:
7  *       2013/01/09
8  *       Support full text search using bigrams.
9  *       Author: NTT DATA Corporation
10  *
11  *-------------------------------------------------------------------------
12  */
13 #include "postgres.h"
14
15 #include "bigm.h"
16
17 #include "access/gin.h"
18 #include "access/gin_private.h"
19 #include "access/itup.h"
20 #include "access/skey.h"
21 #include "access/tuptoaster.h"
22 #include "catalog/pg_type.h"
23 #include "funcapi.h"
24 #include "mb/pg_wchar.h"
25 #include "storage/bufmgr.h"
26 #include "storage/bufpage.h"
27 #include "tsearch/ts_locale.h"
28 #include "utils/array.h"
29 #include "utils/builtins.h"
30
31
32 PG_FUNCTION_INFO_V1(gin_extract_value_bigm);
33 PG_FUNCTION_INFO_V1(gin_extract_query_bigm);
34 PG_FUNCTION_INFO_V1(gin_bigm_consistent);
35 PG_FUNCTION_INFO_V1(gin_bigm_compare_partial);
36 PG_FUNCTION_INFO_V1(pg_gin_pending_stats);
37
38 /* triConsistent function is available only in 9.4 or later */
39 #if PG_VERSION_NUM >= 90400
40 PG_FUNCTION_INFO_V1(gin_bigm_triconsistent);
41 #endif
42
43 /*
44  * The function prototypes are created as a part of PG_FUNCTION_INFO_V1
45  * macro since 9.4, and hence the declaration of the function prototypes
46  * here is necessary only for 9.3 or before.
47  */
48 #if PG_VERSION_NUM < 90400
49 Datum           gin_extract_value_bigm(PG_FUNCTION_ARGS);
50 Datum           gin_extract_query_bigm(PG_FUNCTION_ARGS);
51 Datum           gin_bigm_consistent(PG_FUNCTION_ARGS);
52 Datum           gin_bigm_compare_partial(PG_FUNCTION_ARGS);
53 Datum           pg_gin_pending_stats(PG_FUNCTION_ARGS);
54 #endif
55
56 Datum
57 gin_extract_value_bigm(PG_FUNCTION_ARGS)
58 {
59         text       *val = (text *) PG_GETARG_TEXT_P(0);
60         int32      *nentries = (int32 *) PG_GETARG_POINTER(1);
61         Datum      *entries = NULL;
62         BIGM       *bgm;
63         int32           bgmlen;
64
65         *nentries = 0;
66
67         bgm = generate_bigm(VARDATA(val), VARSIZE(val) - VARHDRSZ);
68         bgmlen = ARRNELEM(bgm);
69
70         if (bgmlen > 0)
71         {
72                 bigm       *ptr;
73                 int32           i;
74
75                 *nentries = bgmlen;
76                 entries = (Datum *) palloc(sizeof(Datum) * bgmlen);
77
78                 ptr = GETARR(bgm);
79                 for (i = 0; i < bgmlen; i++)
80                 {
81                         text       *item = cstring_to_text_with_len(ptr->str, ptr->bytelen);
82
83                         entries[i] = PointerGetDatum(item);
84                         ptr++;
85                 }
86         }
87
88         PG_RETURN_POINTER(entries);
89 }
90
91 Datum
92 gin_extract_query_bigm(PG_FUNCTION_ARGS)
93 {
94         text       *val = (text *) PG_GETARG_TEXT_P(0);
95         int32      *nentries = (int32 *) PG_GETARG_POINTER(1);
96         StrategyNumber strategy = PG_GETARG_UINT16(2);
97
98         bool      **pmatch = (bool **) PG_GETARG_POINTER(3);
99         Pointer   **extra_data = (Pointer **) PG_GETARG_POINTER(4);
100
101         /* bool   **nullFlags = (bool **) PG_GETARG_POINTER(5); */
102         int32      *searchMode = (int32 *) PG_GETARG_POINTER(6);
103         Datum      *entries = NULL;
104         BIGM       *bgm;
105         int32           bgmlen = 0;
106         bigm       *ptr;
107         int32           i;
108         bool            removeDups;
109
110         switch (strategy)
111         {
112                 case LikeStrategyNumber:
113                 {
114                         char       *str = VARDATA(val);
115                         int                     slen = VARSIZE(val) - VARHDRSZ;
116                         bool       *recheck;
117
118                         /*
119                          * For wildcard search we extract all the bigrams that every
120                          * potentially-matching string must include.
121                          */
122                         bgm = generate_wildcard_bigm(str, slen, &removeDups);
123                         bgmlen = ARRNELEM(bgm);
124
125                         /*
126                          * Check whether the heap tuple fetched by index search needs to
127                          * be rechecked against the query. If the search word consists of
128                          * one or two characters and doesn't contain any space character,
129                          * we can guarantee that the index test would be exact. That is,
130                          * the heap tuple does match the query, so it doesn't need to be
131                          * rechecked.
132                          */
133                         *extra_data = (Pointer *) palloc(sizeof(bool));
134                         recheck = (bool *) *extra_data;
135                         if (bgmlen == 1 && !removeDups)
136                         {
137                                 const char *sp;
138
139                                 *recheck = false;
140                                 for (sp = str; (sp - str) < slen;)
141                                 {
142                                         if (t_isspace(sp))
143                                         {
144                                                 *recheck = true;
145                                                 break;
146                                         }
147
148                                         sp += IS_HIGHBIT_SET(*sp) ? pg_mblen(sp) : 1;
149                                 }
150                         }
151                         else
152                                 *recheck = true;
153                         break;
154                 }
155                 case SimilarityStrategyNumber:
156                 {
157                         bgm = generate_bigm(VARDATA(val), VARSIZE(val) - VARHDRSZ);
158                         bgmlen = ARRNELEM(bgm);
159                         break;
160                 }
161                 default:
162                         elog(ERROR, "unrecognized strategy number: %d", strategy);
163                         bgm = NULL;                     /* keep compiler quiet */
164                         break;
165         }
166
167         *nentries = (bigm_gin_key_limit == 0) ?
168                 bgmlen : Min(bigm_gin_key_limit, bgmlen);
169         *pmatch = NULL;
170
171         if (*nentries > 0)
172         {
173                 entries = (Datum *) palloc(sizeof(Datum) * *nentries);
174                 ptr = GETARR(bgm);
175                 for (i = 0; i < *nentries; i++)
176                 {
177                         text       *item;
178
179                         if (ptr->pmatch)
180                         {
181                                 if (*pmatch == NULL)
182                                         *pmatch = (bool *) palloc0(sizeof(bool) * *nentries);
183                                 (*pmatch)[i] = true;
184                         }
185                         item = cstring_to_text_with_len(ptr->str, ptr->bytelen);
186                         entries[i] = PointerGetDatum(item);
187                         ptr++;
188                 }
189         }
190
191         /*
192          * If no bigram was extracted then we have to scan all the index.
193          */
194         if (*nentries == 0)
195                 *searchMode = GIN_SEARCH_MODE_ALL;
196
197         PG_RETURN_POINTER(entries);
198 }
199
200 Datum
201 gin_bigm_consistent(PG_FUNCTION_ARGS)
202 {
203         bool       *check = (bool *) PG_GETARG_POINTER(0);
204         StrategyNumber strategy = PG_GETARG_UINT16(1);
205
206         /* text    *query = PG_GETARG_TEXT_P(2); */
207         int32           nkeys = PG_GETARG_INT32(3);
208
209         Pointer    *extra_data = (Pointer *) PG_GETARG_POINTER(4);
210         bool       *recheck = (bool *) PG_GETARG_POINTER(5);
211         bool            res;
212         int32           i;
213         int32           ntrue;
214
215         switch (strategy)
216         {
217                 case LikeStrategyNumber:
218
219                         /*
220                          * Don't recheck the heap tuple against the query if either
221                          * pg_bigm.enable_recheck is disabled or the search word is the
222                          * special one so that the index can return the exact result.
223                          */
224                         Assert(extra_data != NULL);
225                         *recheck = bigm_enable_recheck &&
226                                 (*((bool *) extra_data) || (nkeys != 1));
227
228                         /* Check if all extracted bigrams are presented. */
229                         res = true;
230                         for (i = 0; i < nkeys; i++)
231                         {
232                                 if (!check[i])
233                                 {
234                                         res = false;
235                                         break;
236                                 }
237                         }
238                         break;
239                 case SimilarityStrategyNumber:
240                         /* Count the matches */
241                         *recheck = bigm_enable_recheck;
242                         ntrue = 0;
243                         for (i = 0; i < nkeys; i++)
244                         {
245                                 if (check[i])
246                                         ntrue++;
247                         }
248 #ifdef DIVUNION
249                         res = (nkeys == ntrue) ? true :
250                                 ((((((float4) ntrue) / ((float4) (nkeys - ntrue)))) >=
251                                   (float4) bigm_similarity_limit) ? true : false);
252 #else
253                         res = (nkeys == 0) ? false :
254                                 ((((((float4) ntrue) / ((float4) nkeys))) >=
255                                   (float4) bigm_similarity_limit) ? true : false);
256 #endif
257                         break;
258                 default:
259                         elog(ERROR, "unrecognized strategy number: %d", strategy);
260                         res = false;            /* keep compiler quiet */
261                         break;
262         }
263
264         PG_RETURN_BOOL(res);
265 }
266
267 /* triConsistent function is available only in 9.4 or later */
268 #if PG_VERSION_NUM >= 90400
269 Datum
270 gin_bigm_triconsistent(PG_FUNCTION_ARGS)
271 {
272         GinTernaryValue  *check = (GinTernaryValue *) PG_GETARG_POINTER(0);
273         StrategyNumber strategy = PG_GETARG_UINT16(1);
274
275         /* text    *query = PG_GETARG_TEXT_P(2); */
276         int32           nkeys = PG_GETARG_INT32(3);
277         Pointer    *extra_data = (Pointer *) PG_GETARG_POINTER(4);
278         GinTernaryValue res = GIN_MAYBE;
279         int32           i,
280                                 ntrue;
281
282         switch (strategy)
283         {
284                 case LikeStrategyNumber:
285                         /*
286                          * Don't recheck the heap tuple against the query if either
287                          * pg_bigm.enable_recheck is disabled or the search word is the
288                          * special one so that the index can return the exact result.
289                          */
290                         res = (bigm_enable_recheck &&
291                                    (*((bool *) extra_data) || (nkeys != 1))) ?
292                                 GIN_MAYBE : GIN_TRUE;
293
294                         /* Check if all extracted bigrams are presented. */
295                         for (i = 0; i < nkeys; i++)
296                         {
297                                 if (check[i] == GIN_FALSE)
298                                 {
299                                         res = GIN_FALSE;
300                                         break;
301                                 }
302                         }
303                         break;
304                 case SimilarityStrategyNumber:
305                         /* Count the matches */
306                         ntrue = 0;
307                         for (i = 0; i < nkeys; i++)
308                         {
309                                 if (check[i] != GIN_FALSE)
310                                         ntrue++;
311                         }
312 #ifdef DIVUNION
313                         res = (nkeys == ntrue) ? GIN_MAYBE :
314                                 (((((float4) ntrue) / ((float4) (nkeys - ntrue))) >=
315                                   (float4) bigm_similarity_limit) ? GIN_MAYBE : GIN_FALSE);
316 #else
317                         res = (nkeys == 0) ? GIN_FALSE :
318                                 (((((float4) ntrue) / ((float4) nkeys)) >=
319                                   (float4) bigm_similarity_limit) ? GIN_MAYBE : GIN_FALSE);
320 #endif
321                         if (res != GIN_FALSE && !bigm_enable_recheck)
322                                 res = GIN_TRUE;
323                         break;
324                 default:
325                         elog(ERROR, "unrecognized strategy number: %d", strategy);
326                         res = GIN_FALSE;                /* keep compiler quiet */
327                         break;
328         }
329
330         PG_RETURN_GIN_TERNARY_VALUE(res);
331 }
332 #endif  /* PG_VERSION_NUM >= 90400 */
333
334 Datum
335 gin_bigm_compare_partial(PG_FUNCTION_ARGS)
336 {
337         text       *arg1 = PG_GETARG_TEXT_PP(0);
338         text       *arg2 = PG_GETARG_TEXT_PP(1);
339         char       *a1p;
340         char       *a2p;
341         int                     mblen1;
342         int                     mblen2;
343         int                     res;
344
345         a1p = VARDATA_ANY(arg1);
346         a2p = VARDATA_ANY(arg2);
347
348         mblen1 = pg_mblen(a1p);
349         mblen2 = pg_mblen(a2p);
350
351         if (mblen1 != mblen2)
352                 PG_RETURN_INT32(1);
353
354         res = memcmp(a1p, a2p, mblen1) ? 1 : 0;
355         PG_RETURN_INT32(res);
356 }
357
358 /*
359  * Report both number of pages and number of heap tuples that
360  * are in the pending list.
361  */
362 Datum
363 pg_gin_pending_stats(PG_FUNCTION_ARGS)
364 {
365         Oid                     indexOid = PG_GETARG_OID(0);
366         Relation        indexRel;
367         Buffer          metabuffer;
368         Page            metapage;
369         GinMetaPageData *metadata;
370         Datum           values[2];
371         bool            isnull[2];
372         HeapTuple       tuple;
373         TupleDesc       tupdesc;
374
375         /*
376          * Obtain statistic information from the meta page
377          */
378         indexRel = index_open(indexOid, AccessShareLock);
379         metabuffer = ReadBuffer(indexRel, GIN_METAPAGE_BLKNO);
380         LockBuffer(metabuffer, GIN_SHARE);
381         metapage = BufferGetPage(metabuffer);
382         metadata = GinPageGetMeta(metapage);
383         index_close(indexRel, AccessShareLock);
384
385         /*
386          * Construct a tuple descriptor for the result row. This must match this
387          * function's pg_bigm--x.x.sql entry.
388          */
389         tupdesc = CreateTemplateTupleDesc(2, false);
390         TupleDescInitEntry(tupdesc, (AttrNumber) 1,
391                                            "pages", INT4OID, -1, 0);
392         TupleDescInitEntry(tupdesc, (AttrNumber) 2,
393                                            "tuples", INT8OID, -1, 0);
394         tupdesc = BlessTupleDesc(tupdesc);
395
396         /* pages */
397         values[0] = Int32GetDatum(metadata->nPendingPages);
398         isnull[0] = false;
399
400         /* tuples */
401         values[1] = Int64GetDatum(metadata->nPendingHeapTuples);
402         isnull[1] = false;
403
404         UnlockReleaseBuffer(metabuffer);
405
406         tuple = heap_form_tuple(tupdesc, values, isnull);
407         PG_RETURN_DATUM(HeapTupleGetDatum(tuple));
408 }