OSDN Git Service

Avoid the redundant declaration of the function prototypes.
[pgbigm/pg_bigm.git] / bigm_gin.c
1 /*-------------------------------------------------------------------------
2  *
3  * Portions Copyright (c) 2007-2012, PostgreSQL Global Development Group
4  * Portions Copyright (c) 2013-2015, NTT DATA Corporation
5  *
6  * Changelog:
7  *   2013/01/09
8  *   Support full text search using bigrams.
9  *   Author: NTT DATA Corporation
10  *
11  *-------------------------------------------------------------------------
12  */
13 #include "postgres.h"
14
15 #include "bigm.h"
16
17 #include "access/gin.h"
18 #include "access/gin_private.h"
19 #include "access/itup.h"
20 #include "access/skey.h"
21 #include "access/tuptoaster.h"
22 #include "catalog/pg_type.h"
23 #include "funcapi.h"
24 #include "mb/pg_wchar.h"
25 #include "storage/bufmgr.h"
26 #include "storage/bufpage.h"
27 #include "tsearch/ts_locale.h"
28 #include "utils/array.h"
29 #include "utils/builtins.h"
30
31
32 PG_FUNCTION_INFO_V1(gin_extract_value_bigm);
33 PG_FUNCTION_INFO_V1(gin_extract_query_bigm);
34 PG_FUNCTION_INFO_V1(gin_bigm_consistent);
35 PG_FUNCTION_INFO_V1(gin_bigm_compare_partial);
36 PG_FUNCTION_INFO_V1(pg_gin_pending_stats);
37
38 /*
39  * The function prototypes are created as a part of PG_FUNCTION_INFO_V1
40  * macro since 9.4, and hence the declaration of the function prototypes
41  * here is necessary only for 9.3 or before.
42  */
43 #if PG_VERSION_NUM < 90400
44 Datum           gin_extract_value_bigm(PG_FUNCTION_ARGS);
45 Datum           gin_extract_query_bigm(PG_FUNCTION_ARGS);
46 Datum           gin_bigm_consistent(PG_FUNCTION_ARGS);
47 Datum           gin_bigm_compare_partial(PG_FUNCTION_ARGS);
48 Datum           pg_gin_pending_stats(PG_FUNCTION_ARGS);
49 #endif
50
51 Datum
52 gin_extract_value_bigm(PG_FUNCTION_ARGS)
53 {
54         text       *val = (text *) PG_GETARG_TEXT_P(0);
55         int32      *nentries = (int32 *) PG_GETARG_POINTER(1);
56         Datum      *entries = NULL;
57         BIGM       *bgm;
58         int32           bgmlen;
59
60         *nentries = 0;
61
62         bgm = generate_bigm(VARDATA(val), VARSIZE(val) - VARHDRSZ);
63         bgmlen = ARRNELEM(bgm);
64
65         if (bgmlen > 0)
66         {
67                 bigm       *ptr;
68                 int32           i;
69
70                 *nentries = bgmlen;
71                 entries = (Datum *) palloc(sizeof(Datum) * bgmlen);
72
73                 ptr = GETARR(bgm);
74                 for (i = 0; i < bgmlen; i++)
75                 {
76                         text            *item = cstring_to_text_with_len(ptr->str, ptr->bytelen);
77                         entries[i] = PointerGetDatum(item);
78                         ptr++;
79                 }
80         }
81
82         PG_RETURN_POINTER(entries);
83 }
84
85 Datum
86 gin_extract_query_bigm(PG_FUNCTION_ARGS)
87 {
88         text       *val = (text *) PG_GETARG_TEXT_P(0);
89         int32      *nentries = (int32 *) PG_GETARG_POINTER(1);
90         StrategyNumber strategy = PG_GETARG_UINT16(2);
91
92         bool   **pmatch = (bool **) PG_GETARG_POINTER(3);
93         Pointer   **extra_data = (Pointer **) PG_GETARG_POINTER(4);
94         /* bool   **nullFlags = (bool **) PG_GETARG_POINTER(5); */
95         int32      *searchMode = (int32 *) PG_GETARG_POINTER(6);
96         Datum      *entries = NULL;
97         BIGM       *bgm;
98         int32           bgmlen = 0;
99         bigm       *ptr;
100         int32           i;
101         bool            removeDups;
102
103         switch (strategy)
104         {
105                 case LikeStrategyNumber:
106                 {
107                         char    *str = VARDATA(val);
108                         int             slen = VARSIZE(val) - VARHDRSZ;
109                         bool    *recheck;
110
111                         /*
112                          * For wildcard search we extract all the bigrams that every
113                          * potentially-matching string must include.
114                          */
115                         bgm = generate_wildcard_bigm(str, slen, &removeDups);
116                         bgmlen = ARRNELEM(bgm);
117
118                         /*
119                          * Check whether the heap tuple fetched by index search needs to be
120                          * rechecked against the query. If the search word consists of one
121                          * or two characters and doesn't contain any space character, we can
122                          * guarantee that the index test would be exact. That is, the heap
123                          * tuple does match the query, so it doesn't need to be rechecked.
124                          */
125                         *extra_data = (Pointer *) palloc(sizeof(bool));
126                         recheck = (bool *) *extra_data;
127                         if (bgmlen == 1 && !removeDups)
128                         {
129                                 const char      *sp;
130
131                                 *recheck = false;
132                                 for (sp = str; (sp - str) < slen;)
133                                 {
134                                         if (t_isspace(sp))
135                                         {
136                                                 *recheck = true;
137                                                 break;
138                                         }
139
140                                         sp += IS_HIGHBIT_SET(*sp) ? pg_mblen(sp) : 1;
141                                 }
142                         }
143                         else
144                                 *recheck = true;
145                         break;
146                 }
147                 case SimilarityStrategyNumber:
148                 {
149                         bgm = generate_bigm(VARDATA(val), VARSIZE(val) - VARHDRSZ);
150                         bgmlen = ARRNELEM(bgm);
151                         break;
152                 }
153                 default:
154                         elog(ERROR, "unrecognized strategy number: %d", strategy);
155                         bgm = NULL;                     /* keep compiler quiet */
156                         break;
157         }
158
159         *nentries = (bigm_gin_key_limit == 0) ?
160                 bgmlen : Min(bigm_gin_key_limit, bgmlen);
161         *pmatch = NULL;
162
163         if (*nentries > 0)
164         {
165                 entries = (Datum *) palloc(sizeof(Datum) * *nentries);
166                 ptr = GETARR(bgm);
167                 for (i = 0; i < *nentries; i++)
168                 {
169                         text            *item;
170
171                         if (ptr->pmatch)
172                         {
173                                 if (*pmatch == NULL)
174                                         *pmatch = (bool *) palloc0(sizeof(bool) * *nentries);
175                                 (*pmatch)[i] = true;
176                         }
177                         item = cstring_to_text_with_len(ptr->str, ptr->bytelen);
178                         entries[i] = PointerGetDatum(item);
179                         ptr++;
180                 }
181         }
182
183         /*
184          * If no bigram was extracted then we have to scan all the index.
185          */
186         if (*nentries == 0)
187                 *searchMode = GIN_SEARCH_MODE_ALL;
188
189         PG_RETURN_POINTER(entries);
190 }
191
192 Datum
193 gin_bigm_consistent(PG_FUNCTION_ARGS)
194 {
195         bool       *check = (bool *) PG_GETARG_POINTER(0);
196         StrategyNumber strategy = PG_GETARG_UINT16(1);
197
198         /* text    *query = PG_GETARG_TEXT_P(2); */
199         int32           nkeys = PG_GETARG_INT32(3);
200
201         Pointer   *extra_data = (Pointer *) PG_GETARG_POINTER(4);
202         bool       *recheck = (bool *) PG_GETARG_POINTER(5);
203         bool            res;
204         int32           i;
205         int32           ntrue;
206
207         switch (strategy)
208         {
209                 case LikeStrategyNumber:
210                         /*
211                          * Don't recheck the heap tuple against the query if either
212                          * pg_bigm.enable_recheck is disabled or the search word is
213                          * the special one so that the index can return the exact
214                          * result.
215                          */
216                         Assert(extra_data != NULL);
217                         *recheck = bigm_enable_recheck &&
218                                 (*((bool *) extra_data) || (nkeys != 1));
219
220                         /* Check if all extracted bigrams are presented. */
221                         res = true;
222                         for (i = 0; i < nkeys; i++)
223                         {
224                                 if (!check[i])
225                                 {
226                                         res = false;
227                                         break;
228                                 }
229                         }
230                         break;
231                 case SimilarityStrategyNumber:
232                         /* Count the matches */
233                         *recheck = bigm_enable_recheck;
234                         ntrue = 0;
235                         for (i = 0; i < nkeys; i++)
236                         {
237                                 if (check[i])
238                                         ntrue++;
239                         }
240 #ifdef DIVUNION
241                         res = (nkeys == ntrue) ? true :
242                                 ((((((float4) ntrue) / ((float4) (nkeys - ntrue)))) >=
243                                   (float4) bigm_similarity_limit) ? true : false);
244 #else
245                         res = (nkeys == 0) ? false :
246                                 ((((((float4) ntrue) / ((float4) nkeys))) >=
247                                   (float4) bigm_similarity_limit) ? true : false);
248 #endif
249                         break;
250                 default:
251                         elog(ERROR, "unrecognized strategy number: %d", strategy);
252                         res = false;            /* keep compiler quiet */
253                         break;
254         }
255
256         PG_RETURN_BOOL(res);
257 }
258
259 Datum
260 gin_bigm_compare_partial(PG_FUNCTION_ARGS)
261 {
262         text    *arg1 = PG_GETARG_TEXT_PP(0);
263         text    *arg2 = PG_GETARG_TEXT_PP(1);
264         char    *a1p;
265         char    *a2p;
266         int             mblen1;
267         int             mblen2;
268         int             res;
269
270         a1p = VARDATA_ANY(arg1);
271         a2p = VARDATA_ANY(arg2);
272
273         mblen1 = pg_mblen(a1p);
274         mblen2 = pg_mblen(a2p);
275
276         if (mblen1 != mblen2)
277                 PG_RETURN_INT32(1);
278
279         res = memcmp(a1p, a2p, mblen1) ? 1 : 0;
280         PG_RETURN_INT32(res);
281 }
282
283 /*
284  * Report both number of pages and number of heap tuples that
285  * are in the pending list.
286  */
287 Datum
288 pg_gin_pending_stats(PG_FUNCTION_ARGS)
289 {
290         Oid                     indexOid = PG_GETARG_OID(0);
291         Relation        indexRel;
292         Buffer          metabuffer;
293         Page            metapage;
294         GinMetaPageData *metadata;
295         Datum           values[2];
296         bool            isnull[2];
297         HeapTuple       tuple;
298         TupleDesc       tupdesc;
299
300         /*
301          * Obtain statistic information from the meta page
302          */
303         indexRel = index_open(indexOid, AccessShareLock);
304         metabuffer = ReadBuffer(indexRel, GIN_METAPAGE_BLKNO);
305         LockBuffer(metabuffer, GIN_SHARE);
306         metapage = BufferGetPage(metabuffer);
307         metadata = GinPageGetMeta(metapage);
308         index_close(indexRel, AccessShareLock);
309
310         /*
311          * Construct a tuple descriptor for the result row. This must
312          * match this function's pg_bigm--x.x.sql entry.
313          */
314         tupdesc = CreateTemplateTupleDesc(2, false);
315         TupleDescInitEntry(tupdesc, (AttrNumber) 1,
316                                            "pages", INT4OID, -1, 0);
317         TupleDescInitEntry(tupdesc, (AttrNumber) 2,
318                                            "tuples", INT8OID, -1, 0);
319         tupdesc = BlessTupleDesc(tupdesc);
320
321         /* pages */
322         values[0] = Int32GetDatum(metadata->nPendingPages);
323         isnull[0] = false;
324
325         /* tuples */
326         values[1] = Int64GetDatum(metadata->nPendingHeapTuples);
327         isnull[1] = false;
328
329         UnlockReleaseBuffer(metabuffer);
330
331         tuple = heap_form_tuple(tupdesc, values, isnull);
332         PG_RETURN_DATUM(HeapTupleGetDatum(tuple));
333 }