OSDN Git Service

Standard pgindent run for 8.1.
[pg-rex/syncrep.git] / contrib / tsearch2 / tsvector.c
1 /*
2  * In/Out definitions for tsvector type
3  * Internal structure:
4  * string of values, array of position lexem in string and it's length
5  * Teodor Sigaev <teodor@sigaev.ru>
6  */
7 #include "postgres.h"
8
9 #include "access/gist.h"
10 #include "access/itup.h"
11 #include "utils/builtins.h"
12 #include "storage/bufpage.h"
13 #include "executor/spi.h"
14 #include "commands/trigger.h"
15 #include "nodes/pg_list.h"
16 #include "catalog/namespace.h"
17
18 #include "utils/pg_locale.h"
19
20 #include <ctype.h>                              /* tolower */
21 #include "tsvector.h"
22 #include "query.h"
23 #include "ts_cfg.h"
24 #include "common.h"
25
26 PG_FUNCTION_INFO_V1(tsvector_in);
27 Datum           tsvector_in(PG_FUNCTION_ARGS);
28
29 PG_FUNCTION_INFO_V1(tsvector_out);
30 Datum           tsvector_out(PG_FUNCTION_ARGS);
31
32 PG_FUNCTION_INFO_V1(to_tsvector);
33 Datum           to_tsvector(PG_FUNCTION_ARGS);
34
35 PG_FUNCTION_INFO_V1(to_tsvector_current);
36 Datum           to_tsvector_current(PG_FUNCTION_ARGS);
37
38 PG_FUNCTION_INFO_V1(to_tsvector_name);
39 Datum           to_tsvector_name(PG_FUNCTION_ARGS);
40
41 PG_FUNCTION_INFO_V1(tsearch2);
42 Datum           tsearch2(PG_FUNCTION_ARGS);
43
44 PG_FUNCTION_INFO_V1(tsvector_length);
45 Datum           tsvector_length(PG_FUNCTION_ARGS);
46
47 /*
48  * in/out text index type
49  */
50 static int
51 comparePos(const void *a, const void *b)
52 {
53         if (WEP_GETPOS(*(WordEntryPos *) a) == WEP_GETPOS(*(WordEntryPos *) b))
54                 return 0;
55         return (WEP_GETPOS(*(WordEntryPos *) a) > WEP_GETPOS(*(WordEntryPos *) b)) ? 1 : -1;
56 }
57
58 static int
59 uniquePos(WordEntryPos * a, int4 l)
60 {
61         WordEntryPos *ptr,
62                            *res;
63
64         res = a;
65         if (l == 1)
66                 return l;
67
68         qsort((void *) a, l, sizeof(WordEntryPos), comparePos);
69
70         ptr = a + 1;
71         while (ptr - a < l)
72         {
73                 if (WEP_GETPOS(*ptr) != WEP_GETPOS(*res))
74                 {
75                         res++;
76                         *res = *ptr;
77                         if (res - a >= MAXNUMPOS - 1 || WEP_GETPOS(*res) == MAXENTRYPOS - 1)
78                                 break;
79                 }
80                 else if (WEP_GETWEIGHT(*ptr) > WEP_GETWEIGHT(*res))
81                         WEP_SETWEIGHT(*res, WEP_GETWEIGHT(*ptr));
82                 ptr++;
83         }
84         return res + 1 - a;
85 }
86
87 static char *BufferStr;
88 static int
89 compareentry(const void *a, const void *b)
90 {
91         if (((WordEntryIN *) a)->entry.len == ((WordEntryIN *) b)->entry.len)
92         {
93                 return strncmp(
94                                            &BufferStr[((WordEntryIN *) a)->entry.pos],
95                                            &BufferStr[((WordEntryIN *) b)->entry.pos],
96                                            ((WordEntryIN *) a)->entry.len);
97         }
98         return (((WordEntryIN *) a)->entry.len > ((WordEntryIN *) b)->entry.len) ? 1 : -1;
99 }
100
101 static int
102 uniqueentry(WordEntryIN * a, int4 l, char *buf, int4 *outbuflen)
103 {
104         WordEntryIN *ptr,
105                            *res;
106
107         res = a;
108         if (l == 1)
109         {
110                 if (a->entry.haspos)
111                 {
112                         *(uint16 *) (a->pos) = uniquePos(&(a->pos[1]), *(uint16 *) (a->pos));
113                         *outbuflen = SHORTALIGN(res->entry.len) + (*(uint16 *) (a->pos) + 1) * sizeof(WordEntryPos);
114                 }
115                 return l;
116         }
117
118         ptr = a + 1;
119         BufferStr = buf;
120         qsort((void *) a, l, sizeof(WordEntryIN), compareentry);
121
122         while (ptr - a < l)
123         {
124                 if (!(ptr->entry.len == res->entry.len &&
125                           strncmp(&buf[ptr->entry.pos], &buf[res->entry.pos], res->entry.len) == 0))
126                 {
127                         if (res->entry.haspos)
128                         {
129                                 *(uint16 *) (res->pos) = uniquePos(&(res->pos[1]), *(uint16 *) (res->pos));
130                                 *outbuflen += *(uint16 *) (res->pos) * sizeof(WordEntryPos);
131                         }
132                         *outbuflen += SHORTALIGN(res->entry.len);
133                         res++;
134                         memcpy(res, ptr, sizeof(WordEntryIN));
135                 }
136                 else if (ptr->entry.haspos)
137                 {
138                         if (res->entry.haspos)
139                         {
140                                 int4            len = *(uint16 *) (ptr->pos) + 1 + *(uint16 *) (res->pos);
141
142                                 res->pos = (WordEntryPos *) repalloc(res->pos, len * sizeof(WordEntryPos));
143                                 memcpy(&(res->pos[*(uint16 *) (res->pos) + 1]),
144                                            &(ptr->pos[1]), *(uint16 *) (ptr->pos) * sizeof(WordEntryPos));
145                                 *(uint16 *) (res->pos) += *(uint16 *) (ptr->pos);
146                                 pfree(ptr->pos);
147                         }
148                         else
149                         {
150                                 res->entry.haspos = 1;
151                                 res->pos = ptr->pos;
152                         }
153                 }
154                 ptr++;
155         }
156         if (res->entry.haspos)
157         {
158                 *(uint16 *) (res->pos) = uniquePos(&(res->pos[1]), *(uint16 *) (res->pos));
159                 *outbuflen += *(uint16 *) (res->pos) * sizeof(WordEntryPos);
160         }
161         *outbuflen += SHORTALIGN(res->entry.len);
162
163         return res + 1 - a;
164 }
165
166 #define WAITWORD        1
167 #define WAITENDWORD 2
168 #define WAITNEXTCHAR    3
169 #define WAITENDCMPLX    4
170 #define WAITPOSINFO 5
171 #define INPOSINFO       6
172 #define WAITPOSDELIM    7
173
174 #define RESIZEPRSBUF \
175 do { \
176         if ( state->curpos - state->word + 1 >= state->len ) \
177         { \
178                 int4 clen = state->curpos - state->word; \
179                 state->len *= 2; \
180                 state->word = (char*)repalloc( (void*)state->word, state->len ); \
181                 state->curpos = state->word + clen; \
182         } \
183 } while (0)
184
185 int4
186 gettoken_tsvector(TI_IN_STATE * state)
187 {
188         int4            oldstate = 0;
189
190         state->curpos = state->word;
191         state->state = WAITWORD;
192         state->alen = 0;
193
194         while (1)
195         {
196                 if (state->state == WAITWORD)
197                 {
198                         if (*(state->prsbuf) == '\0')
199                                 return 0;
200                         else if (*(state->prsbuf) == '\'')
201                                 state->state = WAITENDCMPLX;
202                         else if (*(state->prsbuf) == '\\')
203                         {
204                                 state->state = WAITNEXTCHAR;
205                                 oldstate = WAITENDWORD;
206                         }
207                         else if (state->oprisdelim && ISOPERATOR(*(state->prsbuf)))
208                                 ereport(ERROR,
209                                                 (errcode(ERRCODE_SYNTAX_ERROR),
210                                                  errmsg("syntax error")));
211                         else if (*(state->prsbuf) != ' ')
212                         {
213                                 *(state->curpos) = *(state->prsbuf);
214                                 state->curpos++;
215                                 state->state = WAITENDWORD;
216                         }
217                 }
218                 else if (state->state == WAITNEXTCHAR)
219                 {
220                         if (*(state->prsbuf) == '\0')
221                                 ereport(ERROR,
222                                                 (errcode(ERRCODE_SYNTAX_ERROR),
223                                                  errmsg("there is no escaped character")));
224                         else
225                         {
226                                 RESIZEPRSBUF;
227                                 *(state->curpos) = *(state->prsbuf);
228                                 state->curpos++;
229                                 state->state = oldstate;
230                         }
231                 }
232                 else if (state->state == WAITENDWORD)
233                 {
234                         if (*(state->prsbuf) == '\\')
235                         {
236                                 state->state = WAITNEXTCHAR;
237                                 oldstate = WAITENDWORD;
238                         }
239                         else if (*(state->prsbuf) == ' ' || *(state->prsbuf) == '\0' ||
240                                          (state->oprisdelim && ISOPERATOR(*(state->prsbuf))))
241                         {
242                                 RESIZEPRSBUF;
243                                 if (state->curpos == state->word)
244                                         ereport(ERROR,
245                                                         (errcode(ERRCODE_SYNTAX_ERROR),
246                                                          errmsg("syntax error")));
247                                 *(state->curpos) = '\0';
248                                 return 1;
249                         }
250                         else if (*(state->prsbuf) == ':')
251                         {
252                                 if (state->curpos == state->word)
253                                         ereport(ERROR,
254                                                         (errcode(ERRCODE_SYNTAX_ERROR),
255                                                          errmsg("syntax error")));
256                                 *(state->curpos) = '\0';
257                                 if (state->oprisdelim)
258                                         return 1;
259                                 else
260                                         state->state = INPOSINFO;
261                         }
262                         else
263                         {
264                                 RESIZEPRSBUF;
265                                 *(state->curpos) = *(state->prsbuf);
266                                 state->curpos++;
267                         }
268                 }
269                 else if (state->state == WAITENDCMPLX)
270                 {
271                         if (*(state->prsbuf) == '\'')
272                         {
273                                 RESIZEPRSBUF;
274                                 *(state->curpos) = '\0';
275                                 if (state->curpos == state->word)
276                                         ereport(ERROR,
277                                                         (errcode(ERRCODE_SYNTAX_ERROR),
278                                                          errmsg("syntax error")));
279                                 if (state->oprisdelim)
280                                 {
281                                         state->prsbuf++;
282                                         return 1;
283                                 }
284                                 else
285                                         state->state = WAITPOSINFO;
286                         }
287                         else if (*(state->prsbuf) == '\\')
288                         {
289                                 state->state = WAITNEXTCHAR;
290                                 oldstate = WAITENDCMPLX;
291                         }
292                         else if (*(state->prsbuf) == '\0')
293                                 ereport(ERROR,
294                                                 (errcode(ERRCODE_SYNTAX_ERROR),
295                                                  errmsg("syntax error")));
296                         else
297                         {
298                                 RESIZEPRSBUF;
299                                 *(state->curpos) = *(state->prsbuf);
300                                 state->curpos++;
301                         }
302                 }
303                 else if (state->state == WAITPOSINFO)
304                 {
305                         if (*(state->prsbuf) == ':')
306                                 state->state = INPOSINFO;
307                         else
308                                 return 1;
309                 }
310                 else if (state->state == INPOSINFO)
311                 {
312                         if (isdigit((unsigned char) *(state->prsbuf)))
313                         {
314                                 if (state->alen == 0)
315                                 {
316                                         state->alen = 4;
317                                         state->pos = (WordEntryPos *) palloc(sizeof(WordEntryPos) * state->alen);
318                                         *(uint16 *) (state->pos) = 0;
319                                 }
320                                 else if (*(uint16 *) (state->pos) + 1 >= state->alen)
321                                 {
322                                         state->alen *= 2;
323                                         state->pos = (WordEntryPos *) repalloc(state->pos, sizeof(WordEntryPos) * state->alen);
324                                 }
325                                 (*(uint16 *) (state->pos))++;
326                                 WEP_SETPOS(state->pos[*(uint16 *) (state->pos)], LIMITPOS(atoi(state->prsbuf)));
327                                 if (WEP_GETPOS(state->pos[*(uint16 *) (state->pos)]) == 0)
328                                         ereport(ERROR,
329                                                         (errcode(ERRCODE_SYNTAX_ERROR),
330                                                          errmsg("wrong position info")));
331                                 WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 0);
332                                 state->state = WAITPOSDELIM;
333                         }
334                         else
335                                 ereport(ERROR,
336                                                 (errcode(ERRCODE_SYNTAX_ERROR),
337                                                  errmsg("syntax error")));
338                 }
339                 else if (state->state == WAITPOSDELIM)
340                 {
341                         if (*(state->prsbuf) == ',')
342                                 state->state = INPOSINFO;
343                         else if (tolower(*(state->prsbuf)) == 'a' || *(state->prsbuf) == '*')
344                         {
345                                 if (WEP_GETWEIGHT(state->pos[*(uint16 *) (state->pos)]))
346                                         ereport(ERROR,
347                                                         (errcode(ERRCODE_SYNTAX_ERROR),
348                                                          errmsg("syntax error")));
349                                 WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 3);
350                         }
351                         else if (tolower(*(state->prsbuf)) == 'b')
352                         {
353                                 if (WEP_GETWEIGHT(state->pos[*(uint16 *) (state->pos)]))
354                                         ereport(ERROR,
355                                                         (errcode(ERRCODE_SYNTAX_ERROR),
356                                                          errmsg("syntax error")));
357                                 WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 2);
358                         }
359                         else if (tolower(*(state->prsbuf)) == 'c')
360                         {
361                                 if (WEP_GETWEIGHT(state->pos[*(uint16 *) (state->pos)]))
362                                         ereport(ERROR,
363                                                         (errcode(ERRCODE_SYNTAX_ERROR),
364                                                          errmsg("syntax error")));
365                                 WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 1);
366                         }
367                         else if (tolower(*(state->prsbuf)) == 'd')
368                         {
369                                 if (WEP_GETWEIGHT(state->pos[*(uint16 *) (state->pos)]))
370                                         ereport(ERROR,
371                                                         (errcode(ERRCODE_SYNTAX_ERROR),
372                                                          errmsg("syntax error")));
373                                 WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 0);
374                         }
375                         else if (isspace((unsigned char) *(state->prsbuf)) ||
376                                          *(state->prsbuf) == '\0')
377                                 return 1;
378                         else if (!isdigit((unsigned char) *(state->prsbuf)))
379                                 ereport(ERROR,
380                                                 (errcode(ERRCODE_SYNTAX_ERROR),
381                                                  errmsg("syntax error")));
382                 }
383                 else
384                         /* internal error */
385                         elog(ERROR, "internal error");
386                 state->prsbuf++;
387         }
388
389         return 0;
390 }
391
392 Datum
393 tsvector_in(PG_FUNCTION_ARGS)
394 {
395         char       *buf = PG_GETARG_CSTRING(0);
396         TI_IN_STATE state;
397         WordEntryIN *arr;
398         WordEntry  *inarr;
399         int4            len = 0,
400                                 totallen = 64;
401         tsvector   *in;
402         char       *tmpbuf,
403                            *cur;
404         int4            i,
405                                 buflen = 256;
406
407         SET_FUNCOID();
408         state.prsbuf = buf;
409         state.len = 32;
410         state.word = (char *) palloc(state.len);
411         state.oprisdelim = false;
412
413         arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * totallen);
414         cur = tmpbuf = (char *) palloc(buflen);
415         while (gettoken_tsvector(&state))
416         {
417                 if (len >= totallen)
418                 {
419                         totallen *= 2;
420                         arr = (WordEntryIN *) repalloc((void *) arr, sizeof(WordEntryIN) * totallen);
421                 }
422                 while ((cur - tmpbuf) + (state.curpos - state.word) >= buflen)
423                 {
424                         int4            dist = cur - tmpbuf;
425
426                         buflen *= 2;
427                         tmpbuf = (char *) repalloc((void *) tmpbuf, buflen);
428                         cur = tmpbuf + dist;
429                 }
430                 if (state.curpos - state.word >= MAXSTRLEN)
431                         ereport(ERROR,
432                                         (errcode(ERRCODE_SYNTAX_ERROR),
433                                          errmsg("word is too long")));
434                 arr[len].entry.len = state.curpos - state.word;
435                 if (cur - tmpbuf > MAXSTRPOS)
436                         ereport(ERROR,
437                                         (errcode(ERRCODE_SYNTAX_ERROR),
438                                          errmsg("too long value")));
439                 arr[len].entry.pos = cur - tmpbuf;
440                 memcpy((void *) cur, (void *) state.word, arr[len].entry.len);
441                 cur += arr[len].entry.len;
442                 if (state.alen)
443                 {
444                         arr[len].entry.haspos = 1;
445                         arr[len].pos = state.pos;
446                 }
447                 else
448                         arr[len].entry.haspos = 0;
449                 len++;
450         }
451         pfree(state.word);
452
453         if (len > 0)
454                 len = uniqueentry(arr, len, tmpbuf, &buflen);
455         else
456                 buflen = 0;
457         totallen = CALCDATASIZE(len, buflen);
458         in = (tsvector *) palloc(totallen);
459         memset(in, 0, totallen);
460         in->len = totallen;
461         in->size = len;
462         cur = STRPTR(in);
463         inarr = ARRPTR(in);
464         for (i = 0; i < len; i++)
465         {
466                 memcpy((void *) cur, (void *) &tmpbuf[arr[i].entry.pos], arr[i].entry.len);
467                 arr[i].entry.pos = cur - STRPTR(in);
468                 cur += SHORTALIGN(arr[i].entry.len);
469                 if (arr[i].entry.haspos)
470                 {
471                         memcpy(cur, arr[i].pos, (*(uint16 *) arr[i].pos + 1) * sizeof(WordEntryPos));
472                         cur += (*(uint16 *) arr[i].pos + 1) * sizeof(WordEntryPos);
473                         pfree(arr[i].pos);
474                 }
475                 memcpy(&(inarr[i]), &(arr[i].entry), sizeof(WordEntry));
476         }
477         pfree(tmpbuf);
478         pfree(arr);
479         PG_RETURN_POINTER(in);
480 }
481
482 Datum
483 tsvector_length(PG_FUNCTION_ARGS)
484 {
485         tsvector   *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
486         int4            ret = in->size;
487
488         PG_FREE_IF_COPY(in, 0);
489         PG_RETURN_INT32(ret);
490 }
491
492 Datum
493 tsvector_out(PG_FUNCTION_ARGS)
494 {
495         tsvector   *out = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
496         char       *outbuf;
497         int4            i,
498                                 j,
499                                 lenbuf = 0,
500                                 pp;
501         WordEntry  *ptr = ARRPTR(out);
502         char       *curin,
503                            *curout;
504
505         lenbuf = out->size * 2 /* '' */ + out->size - 1 /* space */ + 2 /* \0 */ ;
506         for (i = 0; i < out->size; i++)
507         {
508                 lenbuf += ptr[i].len * 2 /* for escape */ ;
509                 if (ptr[i].haspos)
510                         lenbuf += 7 * POSDATALEN(out, &(ptr[i]));
511         }
512
513         curout = outbuf = (char *) palloc(lenbuf);
514         for (i = 0; i < out->size; i++)
515         {
516                 curin = STRPTR(out) + ptr->pos;
517                 if (i != 0)
518                         *curout++ = ' ';
519                 *curout++ = '\'';
520                 j = ptr->len;
521                 while (j--)
522                 {
523                         if (*curin == '\'')
524                         {
525                                 int4            pos = curout - outbuf;
526
527                                 outbuf = (char *) repalloc((void *) outbuf, ++lenbuf);
528                                 curout = outbuf + pos;
529                                 *curout++ = '\\';
530                         }
531                         *curout++ = *curin++;
532                 }
533                 *curout++ = '\'';
534                 if ((pp = POSDATALEN(out, ptr)) != 0)
535                 {
536                         WordEntryPos *wptr;
537
538                         *curout++ = ':';
539                         wptr = POSDATAPTR(out, ptr);
540                         while (pp)
541                         {
542                                 sprintf(curout, "%d", WEP_GETPOS(*wptr));
543                                 curout = strchr(curout, '\0');
544                                 switch (WEP_GETWEIGHT(*wptr))
545                                 {
546                                         case 3:
547                                                 *curout++ = 'A';
548                                                 break;
549                                         case 2:
550                                                 *curout++ = 'B';
551                                                 break;
552                                         case 1:
553                                                 *curout++ = 'C';
554                                                 break;
555                                         case 0:
556                                         default:
557                                                 break;
558                                 }
559                                 if (pp > 1)
560                                         *curout++ = ',';
561                                 pp--;
562                                 wptr++;
563                         }
564                 }
565                 ptr++;
566         }
567         *curout = '\0';
568         outbuf[lenbuf - 1] = '\0';
569         PG_FREE_IF_COPY(out, 0);
570         PG_RETURN_POINTER(outbuf);
571 }
572
573 static int
574 compareWORD(const void *a, const void *b)
575 {
576         if (((TSWORD *) a)->len == ((TSWORD *) b)->len)
577         {
578                 int                     res = strncmp(
579                                                                   ((TSWORD *) a)->word,
580                                                                   ((TSWORD *) b)->word,
581                                                                   ((TSWORD *) b)->len);
582
583                 if (res == 0)
584                         return (((TSWORD *) a)->pos.pos > ((TSWORD *) b)->pos.pos) ? 1 : -1;
585                 return res;
586         }
587         return (((TSWORD *) a)->len > ((TSWORD *) b)->len) ? 1 : -1;
588 }
589
590 static int
591 uniqueWORD(TSWORD * a, int4 l)
592 {
593         TSWORD     *ptr,
594                            *res;
595         int                     tmppos;
596
597         if (l == 1)
598         {
599                 tmppos = LIMITPOS(a->pos.pos);
600                 a->alen = 2;
601                 a->pos.apos = (uint16 *) palloc(sizeof(uint16) * a->alen);
602                 a->pos.apos[0] = 1;
603                 a->pos.apos[1] = tmppos;
604                 return l;
605         }
606
607         res = a;
608         ptr = a + 1;
609
610         qsort((void *) a, l, sizeof(TSWORD), compareWORD);
611         tmppos = LIMITPOS(a->pos.pos);
612         a->alen = 2;
613         a->pos.apos = (uint16 *) palloc(sizeof(uint16) * a->alen);
614         a->pos.apos[0] = 1;
615         a->pos.apos[1] = tmppos;
616
617         while (ptr - a < l)
618         {
619                 if (!(ptr->len == res->len &&
620                           strncmp(ptr->word, res->word, res->len) == 0))
621                 {
622                         res++;
623                         res->len = ptr->len;
624                         res->word = ptr->word;
625                         tmppos = LIMITPOS(ptr->pos.pos);
626                         res->alen = 2;
627                         res->pos.apos = (uint16 *) palloc(sizeof(uint16) * res->alen);
628                         res->pos.apos[0] = 1;
629                         res->pos.apos[1] = tmppos;
630                 }
631                 else
632                 {
633                         pfree(ptr->word);
634                         if (res->pos.apos[0] < MAXNUMPOS - 1 && res->pos.apos[res->pos.apos[0]] != MAXENTRYPOS - 1)
635                         {
636                                 if (res->pos.apos[0] + 1 >= res->alen)
637                                 {
638                                         res->alen *= 2;
639                                         res->pos.apos = (uint16 *) repalloc(res->pos.apos, sizeof(uint16) * res->alen);
640                                 }
641                                 if (res->pos.apos[0] == 0 || res->pos.apos[res->pos.apos[0]] != LIMITPOS(ptr->pos.pos))
642                                 {
643                                         res->pos.apos[res->pos.apos[0] + 1] = LIMITPOS(ptr->pos.pos);
644                                         res->pos.apos[0]++;
645                                 }
646                         }
647                 }
648                 ptr++;
649         }
650
651         return res + 1 - a;
652 }
653
654 /*
655  * make value of tsvector
656  */
657 static tsvector *
658 makevalue(PRSTEXT * prs)
659 {
660         int4            i,
661                                 j,
662                                 lenstr = 0,
663                                 totallen;
664         tsvector   *in;
665         WordEntry  *ptr;
666         char       *str,
667                            *cur;
668
669         prs->curwords = uniqueWORD(prs->words, prs->curwords);
670         for (i = 0; i < prs->curwords; i++)
671         {
672                 lenstr += SHORTALIGN(prs->words[i].len);
673
674                 if (prs->words[i].alen)
675                         lenstr += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
676         }
677
678         totallen = CALCDATASIZE(prs->curwords, lenstr);
679         in = (tsvector *) palloc(totallen);
680         memset(in, 0, totallen);
681         in->len = totallen;
682         in->size = prs->curwords;
683
684         ptr = ARRPTR(in);
685         cur = str = STRPTR(in);
686         for (i = 0; i < prs->curwords; i++)
687         {
688                 ptr->len = prs->words[i].len;
689                 if (cur - str > MAXSTRPOS)
690                         ereport(ERROR,
691                                         (errcode(ERRCODE_SYNTAX_ERROR),
692                                          errmsg("value is too big")));
693                 ptr->pos = cur - str;
694                 memcpy((void *) cur, (void *) prs->words[i].word, prs->words[i].len);
695                 pfree(prs->words[i].word);
696                 cur += SHORTALIGN(prs->words[i].len);
697                 if (prs->words[i].alen)
698                 {
699                         WordEntryPos *wptr;
700
701                         ptr->haspos = 1;
702                         *(uint16 *) cur = prs->words[i].pos.apos[0];
703                         wptr = POSDATAPTR(in, ptr);
704                         for (j = 0; j < *(uint16 *) cur; j++)
705                         {
706                                 WEP_SETWEIGHT(wptr[j], 0);
707                                 WEP_SETPOS(wptr[j], prs->words[i].pos.apos[j + 1]);
708                         }
709                         cur += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
710                         pfree(prs->words[i].pos.apos);
711                 }
712                 else
713                         ptr->haspos = 0;
714                 ptr++;
715         }
716         pfree(prs->words);
717         return in;
718 }
719
720
721 Datum
722 to_tsvector(PG_FUNCTION_ARGS)
723 {
724         text       *in = PG_GETARG_TEXT_P(1);
725         PRSTEXT         prs;
726         tsvector   *out = NULL;
727         TSCfgInfo  *cfg;
728
729         SET_FUNCOID();
730         cfg = findcfg(PG_GETARG_INT32(0));
731
732         prs.lenwords = 32;
733         prs.curwords = 0;
734         prs.pos = 0;
735         prs.words = (TSWORD *) palloc(sizeof(TSWORD) * prs.lenwords);
736
737         parsetext_v2(cfg, &prs, VARDATA(in), VARSIZE(in) - VARHDRSZ);
738         PG_FREE_IF_COPY(in, 1);
739
740         if (prs.curwords)
741                 out = makevalue(&prs);
742         else
743         {
744                 pfree(prs.words);
745                 out = palloc(CALCDATASIZE(0, 0));
746                 out->len = CALCDATASIZE(0, 0);
747                 out->size = 0;
748         }
749         PG_RETURN_POINTER(out);
750 }
751
752 Datum
753 to_tsvector_name(PG_FUNCTION_ARGS)
754 {
755         text       *cfg = PG_GETARG_TEXT_P(0);
756         Datum           res;
757
758         SET_FUNCOID();
759         res = DirectFunctionCall3(
760                                                           to_tsvector,
761                                                           Int32GetDatum(name2id_cfg(cfg)),
762                                                           PG_GETARG_DATUM(1),
763                                                           (Datum) 0
764                 );
765
766         PG_FREE_IF_COPY(cfg, 0);
767         PG_RETURN_DATUM(res);
768 }
769
770 Datum
771 to_tsvector_current(PG_FUNCTION_ARGS)
772 {
773         Datum           res;
774
775         SET_FUNCOID();
776         res = DirectFunctionCall3(
777                                                           to_tsvector,
778                                                           Int32GetDatum(get_currcfg()),
779                                                           PG_GETARG_DATUM(0),
780                                                           (Datum) 0
781                 );
782
783         PG_RETURN_DATUM(res);
784 }
785
786 static Oid
787 findFunc(char *fname)
788 {
789         FuncCandidateList clist,
790                                 ptr;
791         Oid                     funcid = InvalidOid;
792         List       *names = list_make1(makeString(fname));
793
794         ptr = clist = FuncnameGetCandidates(names, 1);
795         list_free(names);
796
797         if (!ptr)
798                 return funcid;
799
800         while (ptr)
801         {
802                 if (ptr->args[0] == TEXTOID && funcid == InvalidOid)
803                         funcid = ptr->oid;
804                 clist = ptr->next;
805                 pfree(ptr);
806                 ptr = clist;
807         }
808
809         return funcid;
810 }
811
812 /*
813  * Trigger
814  */
815 Datum
816 tsearch2(PG_FUNCTION_ARGS)
817 {
818         TriggerData *trigdata;
819         Trigger    *trigger;
820         Relation        rel;
821         HeapTuple       rettuple = NULL;
822         int                     numidxattr,
823                                 i;
824         PRSTEXT         prs;
825         Datum           datum = (Datum) 0;
826         Oid                     funcoid = InvalidOid;
827         TSCfgInfo  *cfg;
828
829         SET_FUNCOID();
830         cfg = findcfg(get_currcfg());
831
832         if (!CALLED_AS_TRIGGER(fcinfo))
833                 /* internal error */
834                 elog(ERROR, "TSearch: Not fired by trigger manager");
835
836         trigdata = (TriggerData *) fcinfo->context;
837         if (TRIGGER_FIRED_FOR_STATEMENT(trigdata->tg_event))
838                 /* internal error */
839                 elog(ERROR, "TSearch: Can't process STATEMENT events");
840         if (TRIGGER_FIRED_AFTER(trigdata->tg_event))
841                 /* internal error */
842                 elog(ERROR, "TSearch: Must be fired BEFORE event");
843
844         if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event))
845                 rettuple = trigdata->tg_trigtuple;
846         else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event))
847                 rettuple = trigdata->tg_newtuple;
848         else
849                 /* internal error */
850                 elog(ERROR, "TSearch: Unknown event");
851
852         trigger = trigdata->tg_trigger;
853         rel = trigdata->tg_relation;
854
855         if (trigger->tgnargs < 2)
856                 /* internal error */
857                 elog(ERROR, "TSearch: format tsearch2(tsvector_field, text_field1,...)");
858
859         numidxattr = SPI_fnumber(rel->rd_att, trigger->tgargs[0]);
860         if (numidxattr == SPI_ERROR_NOATTRIBUTE)
861                 ereport(ERROR,
862                                 (errcode(ERRCODE_UNDEFINED_COLUMN),
863                                  errmsg("tsvector column \"%s\" does not exist",
864                                                 trigger->tgargs[0])));
865
866         prs.lenwords = 32;
867         prs.curwords = 0;
868         prs.pos = 0;
869         prs.words = (TSWORD *) palloc(sizeof(TSWORD) * prs.lenwords);
870
871         /* find all words in indexable column */
872         for (i = 1; i < trigger->tgnargs; i++)
873         {
874                 int                     numattr;
875                 Oid                     oidtype;
876                 Datum           txt_toasted;
877                 bool            isnull;
878                 text       *txt;
879
880                 numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]);
881                 if (numattr == SPI_ERROR_NOATTRIBUTE)
882                 {
883                         funcoid = findFunc(trigger->tgargs[i]);
884                         if (funcoid == InvalidOid)
885                                 ereport(ERROR,
886                                                 (errcode(ERRCODE_UNDEFINED_COLUMN),
887                                                  errmsg("could not find function or field \"%s\"",
888                                                                 trigger->tgargs[i])));
889
890                         continue;
891                 }
892                 oidtype = SPI_gettypeid(rel->rd_att, numattr);
893                 /* We assume char() and varchar() are binary-equivalent to text */
894                 if (!(oidtype == TEXTOID ||
895                           oidtype == VARCHAROID ||
896                           oidtype == BPCHAROID))
897                 {
898                         elog(WARNING, "TSearch: '%s' is not of character type",
899                                  trigger->tgargs[i]);
900                         continue;
901                 }
902                 txt_toasted = SPI_getbinval(rettuple, rel->rd_att, numattr, &isnull);
903                 if (isnull)
904                         continue;
905
906                 if (funcoid != InvalidOid)
907                 {
908                         text       *txttmp = (text *) DatumGetPointer(OidFunctionCall1(
909                                                                                                                                          funcoid,
910                                                                                                  PointerGetDatum(txt_toasted)
911                                                                                                                                                    ));
912
913                         txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txttmp)));
914                         if (txt == txttmp)
915                                 txt_toasted = PointerGetDatum(txt);
916                 }
917                 else
918                         txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txt_toasted)));
919
920                 parsetext_v2(cfg, &prs, VARDATA(txt), VARSIZE(txt) - VARHDRSZ);
921                 if (txt != (text *) DatumGetPointer(txt_toasted))
922                         pfree(txt);
923         }
924
925         /* make tsvector value */
926         if (prs.curwords)
927         {
928                 datum = PointerGetDatum(makevalue(&prs));
929                 rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr,
930                                                                    &datum, NULL);
931                 pfree(DatumGetPointer(datum));
932         }
933         else
934         {
935                 tsvector   *out = palloc(CALCDATASIZE(0, 0));
936
937                 out->len = CALCDATASIZE(0, 0);
938                 out->size = 0;
939                 datum = PointerGetDatum(out);
940                 pfree(prs.words);
941                 rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr,
942                                                                    &datum, NULL);
943         }
944
945         if (rettuple == NULL)
946                 /* internal error */
947                 elog(ERROR, "TSearch: %d returned by SPI_modifytuple", SPI_result);
948
949         return PointerGetDatum(rettuple);
950 }
951
952 static int
953 silly_cmp_tsvector(const tsvector * a, const tsvector * b)
954 {
955         if (a->len < b->len)
956                 return -1;
957         else if (a->len > b->len)
958                 return 1;
959         else if (a->size < b->size)
960                 return -1;
961         else if (a->size > b->size)
962                 return 1;
963         else
964         {
965                 unsigned char *aptr = (unsigned char *) (a->data) + DATAHDRSIZE;
966                 unsigned char *bptr = (unsigned char *) (b->data) + DATAHDRSIZE;
967
968                 while (aptr - ((unsigned char *) (a->data)) < a->len)
969                 {
970                         if (*aptr != *bptr)
971                                 return (*aptr < *bptr) ? -1 : 1;
972                         aptr++;
973                         bptr++;
974                 }
975         }
976         return 0;
977 }
978
979 PG_FUNCTION_INFO_V1(tsvector_cmp);
980 PG_FUNCTION_INFO_V1(tsvector_lt);
981 PG_FUNCTION_INFO_V1(tsvector_le);
982 PG_FUNCTION_INFO_V1(tsvector_eq);
983 PG_FUNCTION_INFO_V1(tsvector_ne);
984 PG_FUNCTION_INFO_V1(tsvector_ge);
985 PG_FUNCTION_INFO_V1(tsvector_gt);
986 Datum           tsvector_cmp(PG_FUNCTION_ARGS);
987 Datum           tsvector_lt(PG_FUNCTION_ARGS);
988 Datum           tsvector_le(PG_FUNCTION_ARGS);
989 Datum           tsvector_eq(PG_FUNCTION_ARGS);
990 Datum           tsvector_ne(PG_FUNCTION_ARGS);
991 Datum           tsvector_ge(PG_FUNCTION_ARGS);
992 Datum           tsvector_gt(PG_FUNCTION_ARGS);
993
994 #define RUNCMP                                                                          \
995 tsvector *a                = (tsvector *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0)));\
996 tsvector *b                = (tsvector *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(1)));\
997 int res = silly_cmp_tsvector(a,b);                                                      \
998 PG_FREE_IF_COPY(a,0);                                                                   \
999 PG_FREE_IF_COPY(b,1);                                                                   \
1000
1001 Datum
1002 tsvector_cmp(PG_FUNCTION_ARGS)
1003 {
1004         RUNCMP
1005                 PG_RETURN_INT32(res);
1006 }
1007
1008 Datum
1009 tsvector_lt(PG_FUNCTION_ARGS)
1010 {
1011         RUNCMP
1012                 PG_RETURN_BOOL((res < 0) ? true : false);
1013 }
1014
1015 Datum
1016 tsvector_le(PG_FUNCTION_ARGS)
1017 {
1018         RUNCMP
1019                 PG_RETURN_BOOL((res <= 0) ? true : false);
1020 }
1021
1022 Datum
1023 tsvector_eq(PG_FUNCTION_ARGS)
1024 {
1025         RUNCMP
1026                 PG_RETURN_BOOL((res == 0) ? true : false);
1027 }
1028
1029 Datum
1030 tsvector_ge(PG_FUNCTION_ARGS)
1031 {
1032         RUNCMP
1033                 PG_RETURN_BOOL((res >= 0) ? true : false);
1034 }
1035
1036 Datum
1037 tsvector_gt(PG_FUNCTION_ARGS)
1038 {
1039         RUNCMP
1040                 PG_RETURN_BOOL((res > 0) ? true : false);
1041 }
1042
1043 Datum
1044 tsvector_ne(PG_FUNCTION_ARGS)
1045 {
1046         RUNCMP
1047                 PG_RETURN_BOOL((res != 0) ? true : false);
1048 }