OSDN Git Service

Use the new List API function names throughout the backend, and disable the
[pg-rex/syncrep.git] / contrib / tsearch2 / tsvector.c
1 /*
2  * In/Out definitions for tsvector type
3  * Internal structure:
4  * string of values, array of position lexem in string and it's length
5  * Teodor Sigaev <teodor@sigaev.ru>
6  */
7 #include "postgres.h"
8
9 #include "access/gist.h"
10 #include "access/itup.h"
11 #include "utils/builtins.h"
12 #include "storage/bufpage.h"
13 #include "executor/spi.h"
14 #include "commands/trigger.h"
15 #include "nodes/pg_list.h"
16 #include "catalog/namespace.h"
17
18 #include "utils/pg_locale.h"
19
20 #include <ctype.h>                              /* tolower */
21 #include "tsvector.h"
22 #include "query.h"
23 #include "ts_cfg.h"
24 #include "common.h"
25
26 PG_FUNCTION_INFO_V1(tsvector_in);
27 Datum           tsvector_in(PG_FUNCTION_ARGS);
28
29 PG_FUNCTION_INFO_V1(tsvector_out);
30 Datum           tsvector_out(PG_FUNCTION_ARGS);
31
32 PG_FUNCTION_INFO_V1(to_tsvector);
33 Datum           to_tsvector(PG_FUNCTION_ARGS);
34
35 PG_FUNCTION_INFO_V1(to_tsvector_current);
36 Datum           to_tsvector_current(PG_FUNCTION_ARGS);
37
38 PG_FUNCTION_INFO_V1(to_tsvector_name);
39 Datum           to_tsvector_name(PG_FUNCTION_ARGS);
40
41 PG_FUNCTION_INFO_V1(tsearch2);
42 Datum           tsearch2(PG_FUNCTION_ARGS);
43
44 PG_FUNCTION_INFO_V1(tsvector_length);
45 Datum           tsvector_length(PG_FUNCTION_ARGS);
46
47 /*
48  * in/out text index type
49  */
50 static int
51 comparePos(const void *a, const void *b)
52 {
53         if (((WordEntryPos *) a)->pos == ((WordEntryPos *) b)->pos)
54                 return 1;
55         return (((WordEntryPos *) a)->pos > ((WordEntryPos *) b)->pos) ? 1 : -1;
56 }
57
58 static int
59 uniquePos(WordEntryPos * a, int4 l)
60 {
61         WordEntryPos *ptr,
62                            *res;
63
64         res = a;
65         if (l == 1)
66                 return l;
67
68         qsort((void *) a, l, sizeof(WordEntryPos), comparePos);
69
70         ptr = a + 1;
71         while (ptr - a < l)
72         {
73                 if (ptr->pos != res->pos)
74                 {
75                         res++;
76                         res->pos = ptr->pos;
77                         res->weight = ptr->weight;
78                         if (res - a >= MAXNUMPOS - 1 || res->pos == MAXENTRYPOS - 1)
79                                 break;
80                 }
81                 else if (ptr->weight > res->weight)
82                         res->weight = ptr->weight;
83                 ptr++;
84         }
85         return res + 1 - a;
86 }
87
88 static char *BufferStr;
89 static int
90 compareentry(const void *a, const void *b)
91 {
92         if (((WordEntryIN *) a)->entry.len == ((WordEntryIN *) b)->entry.len)
93         {
94                 return strncmp(
95                                            &BufferStr[((WordEntryIN *) a)->entry.pos],
96                                            &BufferStr[((WordEntryIN *) b)->entry.pos],
97                                            ((WordEntryIN *) a)->entry.len);
98         }
99         return (((WordEntryIN *) a)->entry.len > ((WordEntryIN *) b)->entry.len) ? 1 : -1;
100 }
101
102 static int
103 uniqueentry(WordEntryIN * a, int4 l, char *buf, int4 *outbuflen)
104 {
105         WordEntryIN *ptr,
106                            *res;
107
108         res = a;
109         if (l == 1)
110         {
111                 if (a->entry.haspos)
112                 {
113                         *(uint16 *) (a->pos) = uniquePos(&(a->pos[1]), *(uint16 *) (a->pos));
114                         *outbuflen = SHORTALIGN(res->entry.len) + (*(uint16 *) (a->pos) + 1) * sizeof(WordEntryPos);
115                 }
116                 return l;
117         }
118
119         ptr = a + 1;
120         BufferStr = buf;
121         qsort((void *) a, l, sizeof(WordEntryIN), compareentry);
122
123         while (ptr - a < l)
124         {
125                 if (!(ptr->entry.len == res->entry.len &&
126                           strncmp(&buf[ptr->entry.pos], &buf[res->entry.pos], res->entry.len) == 0))
127                 {
128                         if (res->entry.haspos)
129                         {
130                                 *(uint16 *) (res->pos) = uniquePos(&(res->pos[1]), *(uint16 *) (res->pos));
131                                 *outbuflen += *(uint16 *) (res->pos) * sizeof(WordEntryPos);
132                         }
133                         *outbuflen += SHORTALIGN(res->entry.len);
134                         res++;
135                         memcpy(res, ptr, sizeof(WordEntryIN));
136                 }
137                 else if (ptr->entry.haspos)
138                 {
139                         if (res->entry.haspos)
140                         {
141                                 int4            len = *(uint16 *) (ptr->pos) + 1 + *(uint16 *) (res->pos);
142
143                                 res->pos = (WordEntryPos *) repalloc(res->pos, len * sizeof(WordEntryPos));
144                                 memcpy(&(res->pos[*(uint16 *) (res->pos) + 1]),
145                                            &(ptr->pos[1]), *(uint16 *) (ptr->pos) * sizeof(WordEntryPos));
146                                 *(uint16 *) (res->pos) += *(uint16 *) (ptr->pos);
147                                 pfree(ptr->pos);
148                         }
149                         else
150                         {
151                                 res->entry.haspos = 1;
152                                 res->pos = ptr->pos;
153                         }
154                 }
155                 ptr++;
156         }
157         if (res->entry.haspos)
158         {
159                 *(uint16 *) (res->pos) = uniquePos(&(res->pos[1]), *(uint16 *) (res->pos));
160                 *outbuflen += *(uint16 *) (res->pos) * sizeof(WordEntryPos);
161         }
162         *outbuflen += SHORTALIGN(res->entry.len);
163
164         return res + 1 - a;
165 }
166
167 #define WAITWORD        1
168 #define WAITENDWORD 2
169 #define WAITNEXTCHAR    3
170 #define WAITENDCMPLX    4
171 #define WAITPOSINFO 5
172 #define INPOSINFO       6
173 #define WAITPOSDELIM    7
174
175 #define RESIZEPRSBUF \
176 do { \
177         if ( state->curpos - state->word + 1 >= state->len ) \
178         { \
179                 int4 clen = state->curpos - state->word; \
180                 state->len *= 2; \
181                 state->word = (char*)repalloc( (void*)state->word, state->len ); \
182                 state->curpos = state->word + clen; \
183         } \
184 } while (0)
185
186 int4
187 gettoken_tsvector(TI_IN_STATE * state)
188 {
189         int4            oldstate = 0;
190
191         state->curpos = state->word;
192         state->state = WAITWORD;
193         state->alen = 0;
194
195         while (1)
196         {
197                 if (state->state == WAITWORD)
198                 {
199                         if (*(state->prsbuf) == '\0')
200                                 return 0;
201                         else if (*(state->prsbuf) == '\'')
202                                 state->state = WAITENDCMPLX;
203                         else if (*(state->prsbuf) == '\\')
204                         {
205                                 state->state = WAITNEXTCHAR;
206                                 oldstate = WAITENDWORD;
207                         }
208                         else if (state->oprisdelim && ISOPERATOR(*(state->prsbuf)))
209                                 ereport(ERROR,
210                                                 (errcode(ERRCODE_SYNTAX_ERROR),
211                                                  errmsg("syntax error")));
212                         else if (*(state->prsbuf) != ' ')
213                         {
214                                 *(state->curpos) = *(state->prsbuf);
215                                 state->curpos++;
216                                 state->state = WAITENDWORD;
217                         }
218                 }
219                 else if (state->state == WAITNEXTCHAR)
220                 {
221                         if (*(state->prsbuf) == '\0')
222                                 ereport(ERROR,
223                                                 (errcode(ERRCODE_SYNTAX_ERROR),
224                                                  errmsg("there is no escaped character")));
225                         else
226                         {
227                                 RESIZEPRSBUF;
228                                 *(state->curpos) = *(state->prsbuf);
229                                 state->curpos++;
230                                 state->state = oldstate;
231                         }
232                 }
233                 else if (state->state == WAITENDWORD)
234                 {
235                         if (*(state->prsbuf) == '\\')
236                         {
237                                 state->state = WAITNEXTCHAR;
238                                 oldstate = WAITENDWORD;
239                         }
240                         else if (*(state->prsbuf) == ' ' || *(state->prsbuf) == '\0' ||
241                                          (state->oprisdelim && ISOPERATOR(*(state->prsbuf))))
242                         {
243                                 RESIZEPRSBUF;
244                                 if (state->curpos == state->word)
245                                         ereport(ERROR,
246                                                         (errcode(ERRCODE_SYNTAX_ERROR),
247                                                          errmsg("syntax error")));
248                                 *(state->curpos) = '\0';
249                                 return 1;
250                         }
251                         else if (*(state->prsbuf) == ':')
252                         {
253                                 if (state->curpos == state->word)
254                                         ereport(ERROR,
255                                                         (errcode(ERRCODE_SYNTAX_ERROR),
256                                                          errmsg("syntax error")));
257                                 *(state->curpos) = '\0';
258                                 if (state->oprisdelim)
259                                         return 1;
260                                 else
261                                         state->state = INPOSINFO;
262                         }
263                         else
264                         {
265                                 RESIZEPRSBUF;
266                                 *(state->curpos) = *(state->prsbuf);
267                                 state->curpos++;
268                         }
269                 }
270                 else if (state->state == WAITENDCMPLX)
271                 {
272                         if (*(state->prsbuf) == '\'')
273                         {
274                                 RESIZEPRSBUF;
275                                 *(state->curpos) = '\0';
276                                 if (state->curpos == state->word)
277                                         ereport(ERROR,
278                                                         (errcode(ERRCODE_SYNTAX_ERROR),
279                                                          errmsg("syntax error")));
280                                 if (state->oprisdelim)
281                                 {
282                                         state->prsbuf++;
283                                         return 1;
284                                 }
285                                 else
286                                         state->state = WAITPOSINFO;
287                         }
288                         else if (*(state->prsbuf) == '\\')
289                         {
290                                 state->state = WAITNEXTCHAR;
291                                 oldstate = WAITENDCMPLX;
292                         }
293                         else if (*(state->prsbuf) == '\0')
294                                 ereport(ERROR,
295                                                 (errcode(ERRCODE_SYNTAX_ERROR),
296                                                  errmsg("syntax error")));
297                         else
298                         {
299                                 RESIZEPRSBUF;
300                                 *(state->curpos) = *(state->prsbuf);
301                                 state->curpos++;
302                         }
303                 }
304                 else if (state->state == WAITPOSINFO)
305                 {
306                         if (*(state->prsbuf) == ':')
307                                 state->state = INPOSINFO;
308                         else
309                                 return 1;
310                 }
311                 else if (state->state == INPOSINFO)
312                 {
313                         if (isdigit((unsigned char) *(state->prsbuf)))
314                         {
315                                 if (state->alen == 0)
316                                 {
317                                         state->alen = 4;
318                                         state->pos = (WordEntryPos *) palloc(sizeof(WordEntryPos) * state->alen);
319                                         *(uint16 *) (state->pos) = 0;
320                                 }
321                                 else if (*(uint16 *) (state->pos) + 1 >= state->alen)
322                                 {
323                                         state->alen *= 2;
324                                         state->pos = (WordEntryPos *) repalloc(state->pos, sizeof(WordEntryPos) * state->alen);
325                                 }
326                                 (*(uint16 *) (state->pos))++;
327                                 state->pos[*(uint16 *) (state->pos)].pos = LIMITPOS(atoi(state->prsbuf));
328                                 if (state->pos[*(uint16 *) (state->pos)].pos == 0)
329                                         ereport(ERROR,
330                                                         (errcode(ERRCODE_SYNTAX_ERROR),
331                                                          errmsg("wrong position info")));
332                                 state->pos[*(uint16 *) (state->pos)].weight = 0;
333                                 state->state = WAITPOSDELIM;
334                         }
335                         else
336                                 ereport(ERROR,
337                                                 (errcode(ERRCODE_SYNTAX_ERROR),
338                                                  errmsg("syntax error")));
339                 }
340                 else if (state->state == WAITPOSDELIM)
341                 {
342                         if (*(state->prsbuf) == ',')
343                                 state->state = INPOSINFO;
344                         else if (tolower(*(state->prsbuf)) == 'a' || *(state->prsbuf) == '*')
345                         {
346                                 if (state->pos[*(uint16 *) (state->pos)].weight)
347                                         ereport(ERROR,
348                                                         (errcode(ERRCODE_SYNTAX_ERROR),
349                                                          errmsg("syntax error")));
350                                 state->pos[*(uint16 *) (state->pos)].weight = 3;
351                         }
352                         else if (tolower(*(state->prsbuf)) == 'b')
353                         {
354                                 if (state->pos[*(uint16 *) (state->pos)].weight)
355                                         ereport(ERROR,
356                                                         (errcode(ERRCODE_SYNTAX_ERROR),
357                                                          errmsg("syntax error")));
358                                 state->pos[*(uint16 *) (state->pos)].weight = 2;
359                         }
360                         else if (tolower(*(state->prsbuf)) == 'c')
361                         {
362                                 if (state->pos[*(uint16 *) (state->pos)].weight)
363                                         ereport(ERROR,
364                                                         (errcode(ERRCODE_SYNTAX_ERROR),
365                                                          errmsg("syntax error")));
366                                 state->pos[*(uint16 *) (state->pos)].weight = 1;
367                         }
368                         else if (tolower(*(state->prsbuf)) == 'd')
369                         {
370                                 if (state->pos[*(uint16 *) (state->pos)].weight)
371                                         ereport(ERROR,
372                                                         (errcode(ERRCODE_SYNTAX_ERROR),
373                                                          errmsg("syntax error")));
374                                 state->pos[*(uint16 *) (state->pos)].weight = 0;
375                         }
376                         else if (isspace((unsigned char) *(state->prsbuf)) ||
377                                          *(state->prsbuf) == '\0')
378                                 return 1;
379                         else if (!isdigit((unsigned char) *(state->prsbuf)))
380                                 ereport(ERROR,
381                                                 (errcode(ERRCODE_SYNTAX_ERROR),
382                                                  errmsg("syntax error")));
383                 }
384                 else
385                         /* internal error */
386                         elog(ERROR, "internal error");
387                 state->prsbuf++;
388         }
389
390         return 0;
391 }
392
393 Datum
394 tsvector_in(PG_FUNCTION_ARGS)
395 {
396         char       *buf = PG_GETARG_CSTRING(0);
397         TI_IN_STATE state;
398         WordEntryIN *arr;
399         WordEntry  *inarr;
400         int4            len = 0,
401                                 totallen = 64;
402         tsvector   *in;
403         char       *tmpbuf,
404                            *cur;
405         int4            i,
406                                 buflen = 256;
407
408         state.prsbuf = buf;
409         state.len = 32;
410         state.word = (char *) palloc(state.len);
411         state.oprisdelim = false;
412
413         arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * totallen);
414         cur = tmpbuf = (char *) palloc(buflen);
415         while (gettoken_tsvector(&state))
416         {
417                 if (len >= totallen)
418                 {
419                         totallen *= 2;
420                         arr = (WordEntryIN *) repalloc((void *) arr, sizeof(WordEntryIN) * totallen);
421                 }
422                 while ((cur - tmpbuf) + (state.curpos - state.word) >= buflen)
423                 {
424                         int4            dist = cur - tmpbuf;
425
426                         buflen *= 2;
427                         tmpbuf = (char *) repalloc((void *) tmpbuf, buflen);
428                         cur = tmpbuf + dist;
429                 }
430                 if (state.curpos - state.word >= MAXSTRLEN)
431                         ereport(ERROR,
432                                         (errcode(ERRCODE_SYNTAX_ERROR),
433                                          errmsg("word is too long")));
434                 arr[len].entry.len = state.curpos - state.word;
435                 if (cur - tmpbuf > MAXSTRPOS)
436                         ereport(ERROR,
437                                         (errcode(ERRCODE_SYNTAX_ERROR),
438                                          errmsg("too long value")));
439                 arr[len].entry.pos = cur - tmpbuf;
440                 memcpy((void *) cur, (void *) state.word, arr[len].entry.len);
441                 cur += arr[len].entry.len;
442                 if (state.alen)
443                 {
444                         arr[len].entry.haspos = 1;
445                         arr[len].pos = state.pos;
446                 }
447                 else
448                         arr[len].entry.haspos = 0;
449                 len++;
450         }
451         pfree(state.word);
452
453         if (len > 0)
454                 len = uniqueentry(arr, len, tmpbuf, &buflen);
455         else
456                 buflen=0;
457         totallen = CALCDATASIZE(len, buflen);
458         in = (tsvector *) palloc(totallen);
459         memset(in, 0, totallen);
460         in->len = totallen;
461         in->size = len;
462         cur = STRPTR(in);
463         inarr = ARRPTR(in);
464         for (i = 0; i < len; i++)
465         {
466                 memcpy((void *) cur, (void *) &tmpbuf[arr[i].entry.pos], arr[i].entry.len);
467                 arr[i].entry.pos = cur - STRPTR(in);
468                 cur += SHORTALIGN(arr[i].entry.len);
469                 if (arr[i].entry.haspos)
470                 {
471                         memcpy(cur, arr[i].pos, (*(uint16 *) arr[i].pos + 1) * sizeof(WordEntryPos));
472                         cur += (*(uint16 *) arr[i].pos + 1) * sizeof(WordEntryPos);
473                         pfree(arr[i].pos);
474                 }
475                 memcpy(&(inarr[i]), &(arr[i].entry), sizeof(WordEntry));
476         }
477         pfree(tmpbuf);
478         pfree(arr);
479         PG_RETURN_POINTER(in);
480 }
481
482 Datum
483 tsvector_length(PG_FUNCTION_ARGS)
484 {
485         tsvector   *in = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
486         int4            ret = in->size;
487
488         PG_FREE_IF_COPY(in, 0);
489         PG_RETURN_INT32(ret);
490 }
491
492 Datum
493 tsvector_out(PG_FUNCTION_ARGS)
494 {
495         tsvector   *out = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
496         char       *outbuf;
497         int4            i,
498                                 j,
499                                 lenbuf = 0,
500                                 pp;
501         WordEntry  *ptr = ARRPTR(out);
502         char       *curin,
503                            *curout;
504
505         lenbuf = out->size * 2 /* '' */ + out->size - 1 /* space */ + 2 /* \0 */ ;
506         for (i = 0; i < out->size; i++)
507         {
508                 lenbuf += ptr[i].len * 2 /* for escape */ ;
509                 if (ptr[i].haspos)
510                         lenbuf += 7 * POSDATALEN(out, &(ptr[i]));
511         }
512
513         curout = outbuf = (char *) palloc(lenbuf);
514         for (i = 0; i < out->size; i++)
515         {
516                 curin = STRPTR(out) + ptr->pos;
517                 if (i != 0)
518                         *curout++ = ' ';
519                 *curout++ = '\'';
520                 j = ptr->len;
521                 while (j--)
522                 {
523                         if (*curin == '\'')
524                         {
525                                 int4            pos = curout - outbuf;
526
527                                 outbuf = (char *) repalloc((void *) outbuf, ++lenbuf);
528                                 curout = outbuf + pos;
529                                 *curout++ = '\\';
530                         }
531                         *curout++ = *curin++;
532                 }
533                 *curout++ = '\'';
534                 if ((pp = POSDATALEN(out, ptr)) != 0)
535                 {
536                         WordEntryPos *wptr;
537
538                         *curout++ = ':';
539                         wptr = POSDATAPTR(out, ptr);
540                         while (pp)
541                         {
542                                 sprintf(curout, "%d", wptr->pos);
543                                 curout = strchr(curout, '\0');
544                                 switch (wptr->weight)
545                                 {
546                                         case 3:
547                                                 *curout++ = 'A';
548                                                 break;
549                                         case 2:
550                                                 *curout++ = 'B';
551                                                 break;
552                                         case 1:
553                                                 *curout++ = 'C';
554                                                 break;
555                                         case 0:
556                                         default:
557                                                 break;
558                                 }
559                                 if (pp > 1)
560                                         *curout++ = ',';
561                                 pp--;
562                                 wptr++;
563                         }
564                 }
565                 ptr++;
566         }
567         *curout = '\0';
568         outbuf[lenbuf - 1] = '\0';
569         PG_FREE_IF_COPY(out, 0);
570         PG_RETURN_POINTER(outbuf);
571 }
572
573 static int
574 compareWORD(const void *a, const void *b)
575 {
576         if (((WORD *) a)->len == ((WORD *) b)->len)
577         {
578                 int                     res = strncmp(
579                                                                   ((WORD *) a)->word,
580                                                                   ((WORD *) b)->word,
581                                                                   ((WORD *) b)->len);
582
583                 if (res == 0)
584                         return (((WORD *) a)->pos.pos > ((WORD *) b)->pos.pos) ? 1 : -1;
585                 return res;
586         }
587         return (((WORD *) a)->len > ((WORD *) b)->len) ? 1 : -1;
588 }
589
590 static int
591 uniqueWORD(WORD * a, int4 l)
592 {
593         WORD       *ptr,
594                            *res;
595         int                     tmppos;
596
597         if (l == 1)
598         {
599                 tmppos = LIMITPOS(a->pos.pos);
600                 a->alen = 2;
601                 a->pos.apos = (uint16 *) palloc(sizeof(uint16) * a->alen);
602                 a->pos.apos[0] = 1;
603                 a->pos.apos[1] = tmppos;
604                 return l;
605         }
606
607         res = a;
608         ptr = a + 1;
609
610         qsort((void *) a, l, sizeof(WORD), compareWORD);
611         tmppos = LIMITPOS(a->pos.pos);
612         a->alen = 2;
613         a->pos.apos = (uint16 *) palloc(sizeof(uint16) * a->alen);
614         a->pos.apos[0] = 1;
615         a->pos.apos[1] = tmppos;
616
617         while (ptr - a < l)
618         {
619                 if (!(ptr->len == res->len &&
620                           strncmp(ptr->word, res->word, res->len) == 0))
621                 {
622                         res++;
623                         res->len = ptr->len;
624                         res->word = ptr->word;
625                         tmppos = LIMITPOS(ptr->pos.pos);
626                         res->alen = 2;
627                         res->pos.apos = (uint16 *) palloc(sizeof(uint16) * res->alen);
628                         res->pos.apos[0] = 1;
629                         res->pos.apos[1] = tmppos;
630                 }
631                 else
632                 {
633                         pfree(ptr->word);
634                         if (res->pos.apos[0] < MAXNUMPOS - 1 && res->pos.apos[res->pos.apos[0]] != MAXENTRYPOS - 1)
635                         {
636                                 if (res->pos.apos[0] + 1 >= res->alen)
637                                 {
638                                         res->alen *= 2;
639                                         res->pos.apos = (uint16 *) repalloc(res->pos.apos, sizeof(uint16) * res->alen);
640                                 }
641                                 if ( res->pos.apos[0]==0 || res->pos.apos[res->pos.apos[0]] != LIMITPOS(ptr->pos.pos) ) { 
642                                         res->pos.apos[res->pos.apos[0] + 1] = LIMITPOS(ptr->pos.pos);
643                                         res->pos.apos[0]++;
644                                 }
645                         }
646                 }
647                 ptr++;
648         }
649
650         return res + 1 - a;
651 }
652
653 /*
654  * make value of tsvector
655  */
656 static tsvector *
657 makevalue(PRSTEXT * prs)
658 {
659         int4            i,
660                                 j,
661                                 lenstr = 0,
662                                 totallen;
663         tsvector   *in;
664         WordEntry  *ptr;
665         char       *str,
666                            *cur;
667
668         prs->curwords = uniqueWORD(prs->words, prs->curwords);
669         for (i = 0; i < prs->curwords; i++)
670         {
671                 lenstr += SHORTALIGN(prs->words[i].len);
672
673                 if (prs->words[i].alen)
674                         lenstr += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
675         }
676
677         totallen = CALCDATASIZE(prs->curwords, lenstr);
678         in = (tsvector *) palloc(totallen);
679         memset(in, 0, totallen);
680         in->len = totallen;
681         in->size = prs->curwords;
682
683         ptr = ARRPTR(in);
684         cur = str = STRPTR(in);
685         for (i = 0; i < prs->curwords; i++)
686         {
687                 ptr->len = prs->words[i].len;
688                 if (cur - str > MAXSTRPOS)
689                         ereport(ERROR,
690                                         (errcode(ERRCODE_SYNTAX_ERROR),
691                                          errmsg("value is too big")));
692                 ptr->pos = cur - str;
693                 memcpy((void *) cur, (void *) prs->words[i].word, prs->words[i].len);
694                 pfree(prs->words[i].word);
695                 cur += SHORTALIGN(prs->words[i].len);
696                 if (prs->words[i].alen)
697                 {
698                         WordEntryPos *wptr;
699
700                         ptr->haspos = 1;
701                         *(uint16 *) cur = prs->words[i].pos.apos[0];
702                         wptr = POSDATAPTR(in, ptr);
703                         for (j = 0; j < *(uint16 *) cur; j++)
704                         {
705                                 wptr[j].weight = 0;
706                                 wptr[j].pos = prs->words[i].pos.apos[j + 1];
707                         }
708                         cur += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
709                         pfree(prs->words[i].pos.apos);
710                 }
711                 else
712                         ptr->haspos = 0;
713                 ptr++;
714         }
715         pfree(prs->words);
716         return in;
717 }
718
719
720 Datum
721 to_tsvector(PG_FUNCTION_ARGS)
722 {
723         text       *in = PG_GETARG_TEXT_P(1);
724         PRSTEXT         prs;
725         tsvector   *out = NULL;
726         TSCfgInfo  *cfg = findcfg(PG_GETARG_INT32(0));
727
728         prs.lenwords = 32;
729         prs.curwords = 0;
730         prs.pos = 0;
731         prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
732
733         parsetext_v2(cfg, &prs, VARDATA(in), VARSIZE(in) - VARHDRSZ);
734         PG_FREE_IF_COPY(in, 1);
735
736         if (prs.curwords)
737                 out = makevalue(&prs);
738         else
739         {
740                 pfree(prs.words);
741                 out = palloc(CALCDATASIZE(0, 0));
742                 out->len = CALCDATASIZE(0, 0);
743                 out->size = 0;
744         }
745         PG_RETURN_POINTER(out);
746 }
747
748 Datum
749 to_tsvector_name(PG_FUNCTION_ARGS)
750 {
751         text       *cfg = PG_GETARG_TEXT_P(0);
752         Datum           res = DirectFunctionCall3(
753                                                                                   to_tsvector,
754                                                                                   Int32GetDatum(name2id_cfg(cfg)),
755                                                                                   PG_GETARG_DATUM(1),
756                                                                                   (Datum) 0
757         );
758
759         PG_FREE_IF_COPY(cfg, 0);
760         PG_RETURN_DATUM(res);
761 }
762
763 Datum
764 to_tsvector_current(PG_FUNCTION_ARGS)
765 {
766         Datum           res = DirectFunctionCall3(
767                                                                                   to_tsvector,
768                                                                                   Int32GetDatum(get_currcfg()),
769                                                                                   PG_GETARG_DATUM(0),
770                                                                                   (Datum) 0
771         );
772
773         PG_RETURN_DATUM(res);
774 }
775
776 static Oid
777 findFunc(char *fname)
778 {
779         FuncCandidateList clist,
780                                 ptr;
781         Oid                     funcid = InvalidOid;
782         List       *names = list_make1(makeString(fname));
783
784         ptr = clist = FuncnameGetCandidates(names, 1);
785         list_free(names);
786
787         if (!ptr)
788                 return funcid;
789
790         while (ptr)
791         {
792                 if (ptr->args[0] == TEXTOID && funcid == InvalidOid)
793                         funcid = ptr->oid;
794                 clist = ptr->next;
795                 pfree(ptr);
796                 ptr = clist;
797         }
798
799         return funcid;
800 }
801
802 /*
803  * Trigger
804  */
805 Datum
806 tsearch2(PG_FUNCTION_ARGS)
807 {
808         TriggerData *trigdata;
809         Trigger    *trigger;
810         Relation        rel;
811         HeapTuple       rettuple = NULL;
812         TSCfgInfo  *cfg = findcfg(get_currcfg());
813         int                     numidxattr,
814                                 i;
815         PRSTEXT         prs;
816         Datum           datum = (Datum) 0;
817         Oid                     funcoid = InvalidOid;
818
819         if (!CALLED_AS_TRIGGER(fcinfo))
820                 /* internal error */
821                 elog(ERROR, "TSearch: Not fired by trigger manager");
822
823         trigdata = (TriggerData *) fcinfo->context;
824         if (TRIGGER_FIRED_FOR_STATEMENT(trigdata->tg_event))
825                 /* internal error */
826                 elog(ERROR, "TSearch: Can't process STATEMENT events");
827         if (TRIGGER_FIRED_AFTER(trigdata->tg_event))
828                 /* internal error */
829                 elog(ERROR, "TSearch: Must be fired BEFORE event");
830
831         if (TRIGGER_FIRED_BY_INSERT(trigdata->tg_event))
832                 rettuple = trigdata->tg_trigtuple;
833         else if (TRIGGER_FIRED_BY_UPDATE(trigdata->tg_event))
834                 rettuple = trigdata->tg_newtuple;
835         else
836                 /* internal error */
837                 elog(ERROR, "TSearch: Unknown event");
838
839         trigger = trigdata->tg_trigger;
840         rel = trigdata->tg_relation;
841
842         if (trigger->tgnargs < 2)
843                 /* internal error */
844                 elog(ERROR, "TSearch: format tsearch2(tsvector_field, text_field1,...)");
845
846         numidxattr = SPI_fnumber(rel->rd_att, trigger->tgargs[0]);
847         if (numidxattr == SPI_ERROR_NOATTRIBUTE)
848                 ereport(ERROR,
849                                 (errcode(ERRCODE_UNDEFINED_COLUMN),
850                                  errmsg("tsvector column \"%s\" does not exist",
851                                                 trigger->tgargs[0])));
852
853         prs.lenwords = 32;
854         prs.curwords = 0;
855         prs.pos = 0;
856         prs.words = (WORD *) palloc(sizeof(WORD) * prs.lenwords);
857
858         /* find all words in indexable column */
859         for (i = 1; i < trigger->tgnargs; i++)
860         {
861                 int                     numattr;
862                 Oid                     oidtype;
863                 Datum           txt_toasted;
864                 bool            isnull;
865                 text       *txt;
866
867                 numattr = SPI_fnumber(rel->rd_att, trigger->tgargs[i]);
868                 if (numattr == SPI_ERROR_NOATTRIBUTE)
869                 {
870                         funcoid = findFunc(trigger->tgargs[i]);
871                         if (funcoid == InvalidOid)
872                                 ereport(ERROR,
873                                                 (errcode(ERRCODE_UNDEFINED_COLUMN),
874                                                  errmsg("could not find function or field \"%s\"",
875                                                                 trigger->tgargs[i])));
876
877                         continue;
878                 }
879                 oidtype = SPI_gettypeid(rel->rd_att, numattr);
880                 /* We assume char() and varchar() are binary-equivalent to text */
881                 if (!(oidtype == TEXTOID ||
882                           oidtype == VARCHAROID ||
883                           oidtype == BPCHAROID))
884                 {
885                         elog(WARNING, "TSearch: '%s' is not of character type",
886                                  trigger->tgargs[i]);
887                         continue;
888                 }
889                 txt_toasted = SPI_getbinval(rettuple, rel->rd_att, numattr, &isnull);
890                 if (isnull)
891                         continue;
892
893                 if (funcoid != InvalidOid)
894                 {
895                         text       *txttmp = (text *) DatumGetPointer(OidFunctionCall1(
896                                                                                                                                  funcoid,
897                                                                                          PointerGetDatum(txt_toasted)
898                                                                                                                                           ));
899
900                         txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txttmp)));
901                         if (txt == txttmp)
902                                 txt_toasted = PointerGetDatum(txt);
903                 }
904                 else
905                         txt = (text *) DatumGetPointer(PG_DETOAST_DATUM(PointerGetDatum(txt_toasted)));
906
907                 parsetext_v2(cfg, &prs, VARDATA(txt), VARSIZE(txt) - VARHDRSZ);
908                 if (txt != (text *) DatumGetPointer(txt_toasted))
909                         pfree(txt);
910         }
911
912         /* make tsvector value */
913         if (prs.curwords)
914         {
915                 datum = PointerGetDatum(makevalue(&prs));
916                 rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr,
917                                                                    &datum, NULL);
918                 pfree(DatumGetPointer(datum));
919         }
920         else
921         {
922                 tsvector   *out = palloc(CALCDATASIZE(0, 0));
923
924                 out->len = CALCDATASIZE(0, 0);
925                 out->size = 0;
926                 datum = PointerGetDatum(out);
927                 pfree(prs.words);
928                 rettuple = SPI_modifytuple(rel, rettuple, 1, &numidxattr,
929                                                                    &datum, NULL);
930         }
931
932         if (rettuple == NULL)
933                 /* internal error */
934                 elog(ERROR, "TSearch: %d returned by SPI_modifytuple", SPI_result);
935
936         return PointerGetDatum(rettuple);
937 }
938
939 static int
940 silly_cmp_tsvector(const tsvector *a, const tsvector *b) {
941         if ( a->len < b->len )  
942                 return -1;
943         else if ( a->len > b->len )
944                 return 1;
945         else if ( a->size < b->size ) 
946                 return -1;
947         else if ( a->size > b->size )
948                 return 1;
949         else {
950                 unsigned char *aptr=(unsigned char *)(a->data) + DATAHDRSIZE;
951                 unsigned char *bptr=(unsigned char *)(b->data) + DATAHDRSIZE;
952                 
953                 while( aptr - ( (unsigned char *)(a->data) ) < a->len ) {
954                         if ( *aptr != *bptr )
955                                 return ( *aptr < *bptr ) ? -1 : 1;
956                         aptr++; bptr++;
957                 } 
958         }
959         return 0;       
960 }
961
962 PG_FUNCTION_INFO_V1(tsvector_cmp);
963 PG_FUNCTION_INFO_V1(tsvector_lt);
964 PG_FUNCTION_INFO_V1(tsvector_le);
965 PG_FUNCTION_INFO_V1(tsvector_eq);
966 PG_FUNCTION_INFO_V1(tsvector_ne);
967 PG_FUNCTION_INFO_V1(tsvector_ge);
968 PG_FUNCTION_INFO_V1(tsvector_gt);
969 Datum           tsvector_cmp(PG_FUNCTION_ARGS);
970 Datum           tsvector_lt(PG_FUNCTION_ARGS);
971 Datum           tsvector_le(PG_FUNCTION_ARGS);
972 Datum           tsvector_eq(PG_FUNCTION_ARGS);
973 Datum           tsvector_ne(PG_FUNCTION_ARGS);
974 Datum           tsvector_ge(PG_FUNCTION_ARGS);
975 Datum           tsvector_gt(PG_FUNCTION_ARGS);
976
977 #define RUNCMP                                                                          \
978 tsvector *a        = (tsvector *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(0)));\
979 tsvector *b        = (tsvector *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_DATUM(1)));\
980 int res = silly_cmp_tsvector(a,b);                                                      \
981 PG_FREE_IF_COPY(a,0);                                                                   \
982 PG_FREE_IF_COPY(b,1);                                                                   \
983
984 Datum
985 tsvector_cmp(PG_FUNCTION_ARGS)   {
986         RUNCMP
987         PG_RETURN_INT32(res);
988 }
989
990 Datum
991 tsvector_lt(PG_FUNCTION_ARGS) {
992         RUNCMP
993         PG_RETURN_BOOL((res < 0) ? true : false);
994 }
995
996 Datum
997 tsvector_le(PG_FUNCTION_ARGS) {
998         RUNCMP
999         PG_RETURN_BOOL((res <= 0) ? true : false);
1000 }
1001
1002 Datum
1003 tsvector_eq(PG_FUNCTION_ARGS) {
1004         RUNCMP
1005         PG_RETURN_BOOL((res == 0) ? true : false);
1006 }
1007
1008 Datum
1009 tsvector_ge(PG_FUNCTION_ARGS) {
1010         RUNCMP
1011         PG_RETURN_BOOL((res >= 0) ? true : false);
1012 }
1013  
1014 Datum
1015 tsvector_gt(PG_FUNCTION_ARGS) {
1016         RUNCMP
1017         PG_RETURN_BOOL((res > 0) ? true : false);
1018 }               
1019  
1020 Datum
1021 tsvector_ne(PG_FUNCTION_ARGS) {   
1022         RUNCMP      
1023         PG_RETURN_BOOL((res != 0) ? true : false);
1024 }
1025