OSDN Git Service

Add missing header to silence compiler warnings.
[ludiafuncs/ludia_funcs.git] / ludia_funcs.c
1 /*-------------------------------------------------------------------------
2  *
3  * Copyright (c) 2006-2013, NTT DATA Corporation
4  * All rights reserved.
5  *
6  * Changelog:
7  *   2013/01/09
8  *   Update Ludia functions so that they are available with PostgreSQL9.1.
9  *   Author: NTT DATA Corporation
10  *
11  *-------------------------------------------------------------------------
12  */
13 #include "postgres.h"
14
15 #include <limits.h>
16 #include <sys/types.h>
17 #include <sys/stat.h>
18 #include <unistd.h>
19
20 #include "catalog/pg_type.h"
21 #include "fmgr.h"
22 #include "funcapi.h"
23 #include "ludia_funcs.h"
24 #include "mb/pg_wchar.h"
25 #include "senna.h"
26 #include "storage/fd.h"
27 #include "utils/builtins.h"
28 #include "utils/guc.h"
29 #include "miscadmin.h"
30
31 #if PG_VERSION_NUM >= 90300
32 #include "access/htup_details.h"
33 #endif
34
35 PG_MODULE_MAGIC;
36
37 /* Last update date of ludia_funcs */
38 #define PGS2_LAST_UPDATE        "2013.04.05"
39
40 /* GUC variables */
41 #ifdef PGS2_DEBUG
42 static bool     pgs2_enable_debug = false;
43 #endif
44 static char     *pgs2_last_update = NULL;
45 static int      norm_cache_limit = -1;
46 static bool     escape_snippet_keyword = false;
47
48 #define SEN_NORMALIZE_FLAGS 0
49 #define SEN_MAX_N_EXPRS         32
50
51 /* upper limit for GUC variables measured in kilobytes of memory */
52 /* note that various places assume the byte size fits in a "long" variable */
53 #if SIZEOF_SIZE_T > 4 && SIZEOF_LONG > 4
54 #define MAX_KILOBYTES   INT_MAX
55 #else
56 #define MAX_KILOBYTES   (INT_MAX / 1024)
57 #endif
58
59 #define ISBACKSLASHCHAR(x) (*(x) == '\\')
60 #define ISDOUBLEQUOTECHAR(x) (*(x) == '"')
61 #define ISSENNAOPSCHAR(x) (*(x) == '+' || *(x) == '-' || *(x) == ' ')
62
63 PG_FUNCTION_INFO_V1(pgs2snippet1);
64 Datum   pgs2snippet1(PG_FUNCTION_ARGS);
65 PG_FUNCTION_INFO_V1(pgs2norm);
66 Datum   pgs2norm(PG_FUNCTION_ARGS);
67 PG_FUNCTION_INFO_V1(pgs2textporter1);
68 Datum   pgs2textporter1(PG_FUNCTION_ARGS);
69 PG_FUNCTION_INFO_V1(pgs2seninfo);
70 Datum   pgs2seninfo(PG_FUNCTION_ARGS);
71
72 static sen_encoding     GetSennaEncoding(void);
73 static sen_query        *GetSennaQuery(char *str, size_t len);
74 static bool                     EscapeSnippetKeyword(char **s, size_t *slen);
75
76 #ifdef TEXTPORTER
77 #define TEXTPORTER_TMPDIR                       "/tmp"
78 #define TEXTPORTER_GROUPNAME            "UTF-8"
79 #define TEXTPORTER_DEFLANGNAME          "Japanese"
80 #define TEXTPORTER_BBIGENDIAN           1
81 #define TEXTPORTER_OPTION                       0x00000020      /* DMC_GETTEXT_OPT_LF */
82 #define TEXTPORTER_OPTION_STRING        "32"
83 #define TEXTPORTER_OPTION1                      0x00010000      /* DMC_GETTEXT_OPT1_TXCONV */
84 #define TEXTPORTER_SIZE                         0
85 #define TEXTPORTER_CSV_C                        0
86
87 /* GUC variables for pgs2textpoter1 */
88 static int      textporter_error = ERROR;
89 static unsigned int     textporter_option = TEXTPORTER_OPTION;
90
91 /*
92  * This variable is a dummy that doesn't do anything, except in some
93  * cases provides the value for SHOW to display.  The real state is
94  * elsewhere and is kept in sync by assign_hooks.
95  */
96 static char     *textporter_option_string;
97
98 static const struct config_enum_entry textporter_error_options[] = {
99         {"debug1", DEBUG1, false},
100         {"log", LOG, false},
101         {"info", INFO, false},
102         {"notice", NOTICE, false},
103         {"warning", WARNING, false},
104         {"error", ERROR, false},
105         {NULL, 0, false}
106 };
107
108 static void CleanupTextPorterTmpFiles(void);
109
110 static bool check_textporter_option(char **newval, void **extra, GucSource source);
111 static void assign_textporter_option(const char *newval, void *extra);
112 #endif  /* TEXTPORTER */
113
114 void    _PG_init(void);
115 void    _PG_fini(void);
116
117 void
118 _PG_init(void)
119 {
120         sen_rc          rc;
121
122 #ifdef PGS2_DEBUG
123         /* Define custom GUC variable for debugging */
124         DefineCustomBoolVariable("ludia_funcs.enable_debug",
125                                                          "Emit ludia_funcs debugging output.",
126                                                          NULL,
127                                                          &pgs2_enable_debug,
128                                                          false,
129                                                          PGC_USERSET,
130                                                          0,
131                                                          NULL,
132                                                          NULL,
133                                                          NULL);
134 #endif
135
136         /* Can't be set in postgresql.conf */
137         DefineCustomStringVariable("ludia_funcs.last_update",
138                                                            "Shows the last update date of ludia_funcs.",
139                                                            NULL,
140                                                            &pgs2_last_update,
141                                                            PGS2_LAST_UPDATE,
142                                                            PGC_INTERNAL,
143                                                            GUC_REPORT | GUC_NOT_IN_SAMPLE | GUC_DISALLOW_IN_FILE,
144                                                            NULL,
145                                                            NULL,
146                                                            NULL);
147
148 #ifdef TEXTPORTER
149         /* Define custom GUC variables */
150         DefineCustomEnumVariable("ludia_funcs.textporter_error",
151                                                          "Sets the message levels that are emitted "
152                                                          "when textporter fails.",
153                                                          NULL,
154                                                          &textporter_error,
155                                                          ERROR,
156                                                          textporter_error_options,
157                                                          PGC_SUSET,
158                                                          0,
159                                                          NULL,
160                                                          NULL,
161                                                          NULL);
162
163         DefineCustomStringVariable("ludia_funcs.textporter_option",
164                                                            "Sets the option used to get text data "
165                                                            "from TextPorter",
166                                                            NULL,
167                                                            &textporter_option_string,
168                                                            TEXTPORTER_OPTION_STRING,
169                                                            PGC_SUSET,
170                                                            0,
171                                                            check_textporter_option,
172                                                            assign_textporter_option,
173                                                            NULL);
174
175         /* Clean up remaining textporter temporary files */
176         CleanupTextPorterTmpFiles();
177 #endif  /* TEXTPORTER */
178
179         /*
180          * A value of 0 means no limit on the cache size. A value of -1 means
181          * that work_mem is used as the upper size limit of the cache.
182          */
183         DefineCustomIntVariable("ludia_funcs.norm_cache_limit",
184                                                         "Sets the maximum memory to be used for caching "
185                                                         "the result of pgs2norm()",
186                                                         NULL,
187                                                         &norm_cache_limit,
188                                                         -1,
189                                                         -1,
190                                                         MAX_KILOBYTES,
191                                                         PGC_USERSET,
192                                                         GUC_UNIT_KB,
193                                                         NULL,
194                                                         NULL,
195                                                         NULL);
196
197         DefineCustomBoolVariable("ludia_funcs.escape_snippet_keyword",
198                                                          "Escapes snippet keyword string.",
199                                                          NULL,
200                                                          &escape_snippet_keyword,
201                                                          false,
202                                                          PGC_USERSET,
203                                                          0,
204                                                          NULL,
205                                                          NULL,
206                                                          NULL);
207
208         EmitWarningsOnPlaceholders("ludia_funcs");
209
210         /* Initialize Senna */
211         rc = sen_init();
212         if (rc != sen_success)
213                 ereport(ERROR,
214                                 (errmsg("sen_init() failed: %d", rc)));
215 }
216
217 void
218 _PG_fini(void)
219 {
220 }
221
222 #ifdef TEXTPORTER
223 #define REMOVE_TMPFILE(path)                                                                                    \
224         do {                                                                                                                            \
225                 if (unlink(path) != 0)                                                                                  \
226                         ereport(WARNING,                                                                                        \
227                                         (errcode_for_file_access(),                                                     \
228                                          errmsg("could not remove temporary file \"%s\": %m", path))); \
229         } while(0)
230
231 Datum
232 pgs2textporter1(PG_FUNCTION_ARGS)
233 {
234         char    *appfile = text_to_cstring(PG_GETARG_TEXT_P(0));
235         char    txtfile[] = TEXTPORTER_TMPDIR "/ludia_funcs_XXXXXX";
236         int             tmpfd;
237         int             ret;
238         FILE    *fp;
239         text    *result = NULL;
240         struct stat     statbuf;
241         bool    return_null = false;
242
243         /* Confirm that database encoding is UTF-8 */
244         GetSennaEncoding();
245
246         PG_TRY();
247         {
248                 /*
249                  * Generate a unique temporary filename where text data gotten
250                  * from application file by TextPorter is stored temporarily.
251                  */
252                 tmpfd = mkstemp(txtfile);
253                 if (tmpfd < 0)
254                         ereport(ERROR,
255                                         (errcode_for_file_access(),
256                                          errmsg("could not generate a unique temporary filename: %m")));
257                 if (close(tmpfd) != 0)
258                         ereport(ERROR,
259                                         (errcode_for_file_access(),
260                                          errmsg("could not close temporary file \"%s\": %m", txtfile)));
261
262                 /*
263                  * Run TextPorter to read text data from application file (appfile)
264                  * to temporary file (txtfile).
265                  */
266                 ret = ExecTextPorter((unsigned char *)appfile,
267                                                          (unsigned char *)txtfile,
268                                                          (unsigned char *)TEXTPORTER_GROUPNAME,
269                                                          (unsigned char *)TEXTPORTER_DEFLANGNAME,
270                                                          TEXTPORTER_BBIGENDIAN, textporter_option,
271                                                          TEXTPORTER_OPTION1, TEXTPORTER_SIZE,
272                                                          TEXTPORTER_CSV_C);
273                 if (ret != 0)
274                 {
275                         ereport(textporter_error,
276                                         (errmsg("could not get text from application file \"%s\"",
277                                                         appfile),
278                                          errdetail("DMC_GetText_V5() failed with errcode %d",
279                                                            ret)));
280
281                         /* Return NULL if textporter_error is set to other than ERROR */
282                         return_null = true;
283                 }
284                 else
285                 {
286                         /* Read text data from temporary file to memory */
287                         if (stat(txtfile, &statbuf))
288                                 ereport(ERROR,
289                                                 (errcode_for_file_access(),
290                                                  errmsg("could not stat file \"%s\": %m", txtfile)));
291                         result = (text *) palloc(statbuf.st_size + VARHDRSZ);
292
293                         fp = AllocateFile(txtfile, "r");
294                         if (fp == NULL)
295                                 ereport(ERROR,
296                                                 (errcode_for_file_access(),
297                                                  errmsg("could not open file \"%s\": %m", txtfile)));
298
299                         if (fread(VARDATA(result), 1, statbuf.st_size, fp) != statbuf.st_size ||
300                                 ferror(fp))
301                                 ereport(ERROR,
302                                                 (errcode_for_file_access(),
303                                                  errmsg("could not read file \"%s\": %m", txtfile)));
304
305                         FreeFile(fp);
306                 }
307
308                 REMOVE_TMPFILE(txtfile);
309                 pfree(appfile);
310         }
311         PG_CATCH();
312         {
313                 REMOVE_TMPFILE(txtfile);
314                 PG_RE_THROW();
315         }
316         PG_END_TRY();
317
318         if (return_null)
319                 PG_RETURN_NULL();
320
321         SET_VARSIZE(result, statbuf.st_size + VARHDRSZ);
322
323         PG_RETURN_TEXT_P(result);
324 }
325
326 /*
327  * Clean up remaining textporter temporary files
328  */
329 static void
330 CleanupTextPorterTmpFiles(void)
331 {
332         DIR                             *tpdir;
333         struct dirent   *tpde;
334         char                    path[MAXPGPATH];
335
336         tpdir = AllocateDir(TEXTPORTER_TMPDIR);
337         if (tpdir == NULL)
338                 ereport(ERROR,
339                                 (errcode_for_file_access(),
340                                  errmsg("could not open textporter temporary file directory \"%s\": %m",
341                                                 TEXTPORTER_TMPDIR)));
342
343         while ((tpde = ReadDir(tpdir, TEXTPORTER_TMPDIR)) != NULL)
344         {
345                 if (strlen(tpde->d_name) == 18 &&
346                         strncmp(tpde->d_name, "ludia_funcs_", 12) == 0)
347                 {
348                         snprintf(path, MAXPGPATH, TEXTPORTER_TMPDIR "/%s", tpde->d_name);
349                         REMOVE_TMPFILE(path);
350                 }
351         }
352
353         FreeDir(tpdir);
354 }
355
356 static bool
357 check_textporter_option(char **newval, void **extra, GucSource source)
358 {
359         unsigned long   val;
360         char                    *endptr;
361         unsigned int    *myextra;
362
363         errno = 0;
364         val = strtoul(*newval, &endptr, 0);
365
366         if (*endptr != '\0')
367                 return false;
368
369         if (errno == ERANGE || val != (unsigned long) ((unsigned int) val))
370         {
371                 GUC_check_errhint("Value exceeds unsigned integer range.");
372                 return false;
373         }
374
375         /* Set up the "extra" struct actually used by assign_textporter_option */
376         myextra = (unsigned int *) malloc(sizeof(unsigned int));
377         if (myextra == NULL)
378         {
379                 GUC_check_errcode(ERRCODE_OUT_OF_MEMORY);
380                 GUC_check_errmsg("out of memory");
381                 return false;
382         }
383         *myextra = (unsigned int) val;
384         *extra = (void *) myextra;
385
386         return true;
387 }
388
389 static void
390 assign_textporter_option(const char *newval, void *extra)
391 {
392         textporter_option = *((unsigned int *) extra);
393 }
394
395 #else   /* TEXTPORTER */
396
397 Datum
398 pgs2textporter1(PG_FUNCTION_ARGS)
399 {
400         PG_RETURN_NULL();
401 }
402
403 #endif  /* TEXTPORTER */
404
405 static sen_encoding
406 GetSennaEncoding(void)
407 {
408         static sen_encoding             encoding = sen_enc_default;
409
410         if (encoding == sen_enc_default)
411         {
412                 if (GetDatabaseEncoding() == PG_UTF8)
413                         encoding = sen_enc_utf8;
414                 else
415                         ereport(ERROR,
416                                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
417                                          errmsg("does not support database encoding \"%s\"",
418                                                         GetDatabaseEncodingName())));
419         }
420         return encoding;
421 }
422
423 /*
424  * Escape the backslash and double quote characters in the given string.
425  *
426  * Return false if the given string has no character which needs to be
427  * escaped. Otherwise, return true. In this case, **s points the palloc'd
428  * space storing the escaped keyword string and *slen is set to the size
429  * of that string. The caller needs to free the palloc'd space.
430  */
431 static bool
432 EscapeSnippetKeyword(char **s, size_t *slen)
433 {
434         const char      *sp;
435         char            *ep;
436         char            *escaped;
437         int                     mblen;
438         int                     copylen;
439         bool            in_doublequote = false;
440         bool            in_sennaops = false;
441         bool            need_escape = false;
442
443         /*
444          * Skip the heading double quote character because it always doesn't
445          * need to be interpreted as a character itself and be escaped.
446          * Note that we must not skip the heading character if it's not a
447          * double quote.
448          */
449         sp = *s;
450         if (ISDOUBLEQUOTECHAR(sp))
451                 sp++;
452
453         /*
454          * Check whether the snippet keyword string has a character which
455          * needs to be escaped.
456          */
457         while ((sp - *s) < *slen)
458         {
459                 mblen = pg_mblen(sp);
460
461                 /*
462                  * Backslash in the keyword always needs to be escaped.
463                  */
464                 if (ISBACKSLASHCHAR(sp))
465                 {
466                         need_escape = true;
467                         break;
468                 }
469
470                 if (in_doublequote)
471                 {
472                         if (ISSENNAOPSCHAR(sp))
473                         {
474                                 in_sennaops = true;
475                                 in_doublequote = false;
476                         }
477                         else
478                         {
479                                 /*
480                                  * Double quote in the keyword needs to be escaped if
481                                  * any Senna search operators are to neither its right
482                                  * nor left.
483                                  */
484                                 need_escape = true;
485                                 break;
486                         }
487                 }
488                 else
489                 {
490                         if (ISDOUBLEQUOTECHAR(sp) && !in_sennaops)
491                                 in_doublequote = true;
492                         if (!ISSENNAOPSCHAR(sp))
493                                 in_sennaops = false;
494                 }
495
496                 sp += mblen;
497         }
498
499         /*
500          * Quick exit if the keyword has no character which needs to be
501          * escaped.
502          */
503         if (!need_escape)
504                 return false;
505
506         /*
507          * Allocate the buffer space to store the escaped snippet keyword string.
508          * The maximum size of escaped string is double the input keyword size.
509          * The size reaches the maximum when every character in the input keyword
510          * needs to be escaped.
511          */
512         ep = escaped = (char *) palloc(*slen * 2);
513
514         /*
515          * Copy the characters which have been passed through in the above loop
516          * and don't need to be escaped, into the buffer. If in_doublequote is
517          * true, we don't copy the double quote in the previous position into the
518          * buffer because it might still need to be escaped.
519          */
520         copylen = sp - *s - ((in_doublequote) ? 1 : 0);
521         memcpy(ep, *s, copylen);
522         ep += copylen;
523
524         /*
525          * Construct the escaped snippet keyword string.
526          */
527         while ((sp - *s) < *slen)
528         {
529                 mblen = pg_mblen(sp);
530
531                 if (in_doublequote)
532                 {
533                         /*
534                          * dqchar indicates the previous character, that is a double
535                          * quote. We assume here that a double quote is single-byte
536                          * character.
537                          */
538                         char dqchar     = *(sp - 1);
539
540                         if (ISSENNAOPSCHAR(sp))
541                         {
542                                 /*
543                                  * Don't escape the double quote which is just before Senna
544                                  * operator.
545                                  */
546                                 *ep++ = dqchar;
547                                 *ep++ = *sp;
548                                 in_sennaops = true;
549                                 in_doublequote = false;
550                         }
551                         else
552                         {
553                                 /*
554                                  * Escape the double quote if no Senna operator is next to it.
555                                  */
556                                 *ep++ = '\\';
557                                 *ep++ = dqchar;
558
559                                 if (ISDOUBLEQUOTECHAR(sp))
560                                         in_doublequote = true;
561                                 else
562                                 {
563                                         if (ISBACKSLASHCHAR(sp))
564                                                 *ep++ = '\\';
565                                         memcpy(ep, sp, mblen);
566                                         ep += mblen;
567                                         in_doublequote = false;
568                                 }
569                         }
570                 }
571                 else
572                 {
573                         if (ISDOUBLEQUOTECHAR(sp))
574                         {
575                                 /*
576                                  * Don't escape the double quote which is just after Senna
577                                  * operator.
578                                  */
579                                 if (in_sennaops)
580                                         *ep++ = *sp;
581                                 else
582                                         in_doublequote = true;
583                         }
584                         else
585                         {
586                                 if (ISBACKSLASHCHAR(sp))
587                                         *ep++ = '\\';
588                                 /*
589                                  * We don't check ISSENNAOPSCHAR() here. We handle Senna
590                                  * operator character as a character itself instead of
591                                  * an operator if it doesn't follow a double quote.
592                                  */
593                                 memcpy(ep, sp, mblen);
594                                 ep += mblen;
595                         }
596
597                         if (!ISSENNAOPSCHAR(sp))
598                                 in_sennaops = false;
599                 }
600
601                 sp += mblen;
602         }
603
604         /* Add the tailing double quote into the buffer */
605         if (in_doublequote)
606                 *ep++ = *(sp - 1);
607
608         *s = escaped;
609         *slen = ep - *s;
610
611 #ifdef PGS2_DEBUG
612         if (pgs2_enable_debug)
613         {
614                 char    *tmp = pnstrdup(*s, *slen);
615
616                 elog(LOG, "escaped snippet keyword: %s", tmp);
617                 pfree(tmp);
618         }
619 #endif
620
621         return true;
622 }
623
624 static sen_query *
625 GetSennaQuery(char *str, size_t len)
626 {
627         static sen_query        *query_cache = NULL;
628         static char                     *key_cache = NULL;
629         static size_t           len_cache = 0;
630         static bool                     guc_cache = false;
631         sen_query       *query;
632         sen_encoding    encoding;
633         char            *key;
634         size_t          key_len;
635         int                     rest;
636         bool            needfree = false;
637
638         /*
639          * Return the cached Senna query if the same keyword has
640          * been used the last time.
641          */
642         if (key_cache != NULL &&
643                 len == len_cache &&
644                 strncmp(key_cache, str, len) == 0 &&
645                 escape_snippet_keyword == guc_cache)
646         {
647 #ifdef PGS2_DEBUG
648                 if (pgs2_enable_debug)
649                 {
650                         char    *tmp = pnstrdup(str, len);
651
652                         elog(LOG, "GetSennaQuery(): quick exit: %s", tmp);
653                         pfree(tmp);
654                 }
655 #endif
656                 return query_cache;
657         }
658
659         encoding = GetSennaEncoding();
660
661         key = malloc(len);
662         if (key == NULL)
663                 ereport(ERROR,
664                                 (errcode(ERRCODE_OUT_OF_MEMORY),
665                                  errmsg("out of memory")));
666
667         /*
668          * We always cache the unescaped keyword. Which enables us
669          * to check whether we can use the cached Senna query before
670          * escaping the keyword.
671          */
672         memcpy(key, str, len);
673         key_len = len;
674
675         /*
676          * If the keyword has been escaped, 'str' points to the
677          * newly-palloc'd space storing the escaped keyword. This
678          * space needs to be freed later.
679          */
680         if (escape_snippet_keyword)
681                 needfree = EscapeSnippetKeyword(&str, &len);
682
683         query = sen_query_open(str, len, sen_sel_or, SEN_MAX_N_EXPRS,
684                                                    encoding);
685         if (query == NULL)
686         {
687                 free(key);
688                 ereport(ERROR,
689                                 (errmsg("sen_query_open() failed")));
690         }
691
692         if ((rest = sen_query_rest(query, NULL)) != 0)
693                 ereport(WARNING,
694                                 (errmsg("too many expressions (%d)", rest)));
695
696         if (query_cache != NULL)
697         {
698                 sen_query_close(query_cache);
699                 free(key_cache);
700         }
701
702         key_cache = key;
703         len_cache = key_len;
704         query_cache = query;
705         guc_cache = escape_snippet_keyword;
706
707         if (needfree)
708                 pfree(str);
709
710         return query;
711 }
712
713 Datum
714 pgs2snippet1(PG_FUNCTION_ARGS)
715 {
716         int                     flags = PG_GETARG_INT32(0);
717         uint32          width = PG_GETARG_UINT32(1);
718         uint32          max_results = PG_GETARG_UINT32(2);
719         text       *opentags = PG_GETARG_TEXT_P(3);
720         text       *closetags = PG_GETARG_TEXT_P(4);
721         int                     mapping = PG_GETARG_INT32(5);
722         text       *keywords = PG_GETARG_TEXT_P(6);
723         text       *document = PG_GETARG_TEXT_P(7);
724         sen_query  *query;
725         sen_snip   *snip = NULL;
726         const char *opentags_str = VARDATA_ANY(opentags);
727         const char *closetags_str = VARDATA_ANY(closetags);
728         char       *keywords_str = VARDATA_ANY(keywords);
729         char       *document_str = VARDATA_ANY(document);
730         uint32          opentags_len = VARSIZE_ANY_EXHDR(opentags);
731         uint32          closetags_len = VARSIZE_ANY_EXHDR(closetags);
732         uint32          keywords_len = VARSIZE_ANY_EXHDR(keywords);
733         uint32          document_len = VARSIZE_ANY_EXHDR(document);
734         uint32          nresults = 0;
735         uint32          max_tagged_len = 0;
736         sen_rc          rc;
737         text       *result;
738         uint32          result_len = 0;
739         bool            return_null = false;
740
741         query = GetSennaQuery(keywords_str, keywords_len);
742
743         snip = sen_query_snip(query, flags, width, max_results, 1,
744                                                   &opentags_str, &opentags_len,
745                                                   &closetags_str, &closetags_len,
746                                                   mapping == 0 ? NULL : (sen_snip_mapping *)-1);
747         if (snip == NULL)
748                 ereport(ERROR,
749                                 (errmsg("sen_query_snip() failed")));
750
751         PG_TRY();
752         {
753                 rc = sen_snip_exec(snip, document_str, document_len,
754                                                    &nresults, &max_tagged_len);
755                 if (rc != sen_success)
756                         ereport(ERROR,
757                                         (errmsg("sen_snip_exec() failed: %d", rc)));
758
759                 result = (text *) palloc(max_tagged_len + VARHDRSZ);
760
761                 rc = sen_snip_get_result(snip, 0, VARDATA(result), &result_len);
762                 if (rc == sen_invalid_argument)
763                         return_null = true;
764                 else if (rc != sen_success)
765                         ereport(ERROR,
766                                         (errmsg("sen_snip_get_result() failed: %d", rc)));
767         }
768         PG_CATCH();
769         {
770                 sen_snip_close(snip);
771                 PG_RE_THROW();
772         }
773         PG_END_TRY();
774
775         sen_snip_close(snip);
776
777         if (return_null)
778                 PG_RETURN_NULL();
779
780         SET_VARSIZE(result, max_tagged_len + VARHDRSZ);
781
782         PG_RETURN_TEXT_P(result);
783 }
784
785 /*
786  * Make sure there is enough space for 'needed' more bytes.
787  *
788  * Sets **buf to the allocated space which can store the needed bytes if OK,
789  * NULL if failed to enlarge the space because 'needed' is larger than 'maxlen'.
790  */
791 static inline void
792 pgs2malloc(void **buf, long *buflen, long needed, long maxlen)
793 {
794 #ifdef PGS2_DEBUG
795         if (pgs2_enable_debug)
796                 elog(LOG, "pgs2malloc(): buflen %ld, needed %ld, maxlen %ld",
797                          *buflen, needed, maxlen);
798 #endif
799
800         if (*buf != NULL && *buflen >= needed && (*buflen <= maxlen || maxlen == 0))
801                 return;         /* got enough space already */
802
803         /*
804          * Release the already-allocated space since it's too small to
805          * store the needed bytes or larger than the upper limit.
806          */
807         if (*buf != NULL)
808         {
809                 free(*buf);
810                 *buf = NULL;
811                 *buflen = 0;
812         }
813
814         /*
815          * Don't allocate any space if the needed space is larger than
816          * the upper limit.
817          */
818         if (needed > maxlen && maxlen != 0)
819                 return;
820
821         /*
822          * Allocate the space for the needed bytes.
823          *
824          * We don't want to allocate just a little more space with each enlarge;
825          * for efficiency, double the buffer size each time it overflows.
826          * Actually, we might need to more than double it if 'needed' is big...
827          *
828          * We check whether '*buflen' overflows each cycle to avoid infinite loop.
829          */
830         *buflen = 1024L;
831         while (*buflen < needed && *buflen != 0)
832                 *buflen <<= 1;
833
834         /*
835          * Clamp to maxlen in case we went past it.  Note we are assuming
836          * here that maxlen <= LONG_MAX/2, else the above loop could
837          * overflow.  We will still have *buflen >= needed.
838          */
839         if (*buflen > maxlen && maxlen != 0)
840                 *buflen = maxlen;
841
842         /* Guard against out-of-range '*buflen' value */
843         if (*buflen == 0)
844                 ereport(ERROR,
845                                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
846                                  errmsg("out of memory"),
847                                  errdetail("Cannot enlarge buffer by %ld more bytes.",
848                                                    needed)));
849
850         *buf = (void *) malloc(*buflen);
851         if (*buf == NULL)
852                 ereport(ERROR,
853                                 (errcode(ERRCODE_OUT_OF_MEMORY),
854                                  errmsg("out of memory")));
855 }
856
857 Datum
858 pgs2norm(PG_FUNCTION_ARGS)
859 {
860         text            *str = PG_GETARG_TEXT_PP(0);
861         char            *s = VARDATA_ANY(str);
862         long            slen = VARSIZE_ANY_EXHDR(str);
863         text            *result = NULL;
864         long            buflen;
865         long            reslen;
866         long            maxlen;
867         long            needed;
868
869         /*
870          * norm_cache is the cache memory storing both input and normalized strings
871          * as the result of pgs2norm(). norm_cache_size is the size of norm_cache
872          * and its upper limit is specified by norm_cache_limit parameter. norm_result
873          * is the pointer to the normalized string with the verlena header (i.e.,
874          * text type) stored in the latter half of the cache. norm_reslen is the size
875          * of norm_result. norm_slen is the size of the input string which is stored
876          * in the first half of the cache.
877          */
878         static char             *norm_cache = NULL;
879         static long             norm_cache_size = 0;
880         static long             norm_slen = 0;
881         static char             *norm_result = NULL;
882         static long             norm_reslen = 0;
883
884         /*
885          * Return the cached normalization result if the same string of
886          * the given one has been normalized the last time.
887          */
888         if (norm_cache != NULL &&
889                 norm_slen == slen &&
890                 strncmp(norm_cache, s, slen) == 0)
891         {
892 #ifdef PGS2_DEBUG
893                 if (pgs2_enable_debug)
894                 {
895                         char    *tmp = text_to_cstring(str);
896
897                         elog(LOG, "pgs2norm(): quick exit: %s", tmp);
898                         pfree(tmp);
899                 }
900 #endif
901
902                 PG_RETURN_TEXT_P(pnstrdup(norm_result, norm_reslen));
903         }
904
905         /* Confirm that database encoding is UTF-8 */
906         GetSennaEncoding();
907
908         /*
909          * Allocate the result buffer to store the normalized string. Since the size of
910          * normalized string can be larger than that of input one, the result buffer needs
911          * extra space. Problem is that, before calling sen_str_normalize, we need to
912          * allocate the result buffer but cannot know how large extra space is required.
913          * So we use RESULT_EXTRA_SIZE as the estimated size of extra space here.
914          */
915 #define RESULT_EXTRA_SIZE       64
916         buflen = slen + RESULT_EXTRA_SIZE;
917
918 retry:
919         result = (text *) palloc(buflen + VARHDRSZ);
920
921 #if defined(FAST_SENNA)
922         reslen = fast_sen_str_normalize(s, slen, VARDATA(result), buflen);
923 #else
924         reslen = sen_str_normalize(s, slen, sen_enc_utf8,
925                                                            SEN_NORMALIZE_FLAGS,
926                                                            VARDATA(result), buflen);
927 #endif
928
929         if (reslen < 0)
930                 ereport(ERROR,
931                                 (errmsg("could not normalize the string")));
932
933         /*
934          * If the result buffer size is too short to store the normalized string,
935          * we enlarge the buffer and retry the string normalization.
936          */
937         if (buflen <= reslen)
938         {
939                 pfree(result);
940                 buflen = reslen + 1;
941                 goto retry;
942         }
943
944         SET_VARSIZE(result, reslen + VARHDRSZ);
945
946         /*
947          * Cache both input and normalized strings to accelerate the subsequent
948          * calls of pgs2norm() with the same input string. But we don't do that
949          * if the maximum allowed size of the cache is too small to store them.
950          */
951         needed = slen + reslen + VARHDRSZ;
952         maxlen = ((norm_cache_limit >= 0) ? norm_cache_limit : work_mem) * 1024L;
953
954         pgs2malloc((void **) &norm_cache, &norm_cache_size, needed, maxlen);
955         if (norm_cache != NULL)
956         {
957                 /* Store the input string into the first half of the cache */
958                 norm_slen = slen;
959                 memcpy(norm_cache, s, slen);
960
961                 /*
962                  * Store the normalized string with the varlena header (i.e., text type)
963                  * into the latter half of the cache.
964                  */
965                 norm_result = norm_cache + slen;
966                 norm_reslen = reslen + VARHDRSZ;
967                 memcpy(norm_result, result, norm_reslen);
968         }
969
970 #ifdef PGS2_DEBUG
971         if (pgs2_enable_debug)
972         {
973                 char    *tmp = text_to_cstring(str);
974
975                 elog(LOG, "pgs2norm(): complete (%s result cache): %s",
976                          (norm_cache == NULL) ? "unset" : "set", tmp);
977                 pfree(tmp);
978         }
979 #endif
980
981         PG_RETURN_TEXT_P(result);
982 }
983
984 /*
985  * Report the version and configure options of Senna which
986  * ludia_funcs depends on.
987  */
988 Datum
989 pgs2seninfo(PG_FUNCTION_ARGS)
990 {
991         char    *version[MAXPGPATH];
992         char    *coptions[MAXPGPATH];
993         Datum   values[2];
994         bool    isnull[2];
995         HeapTuple tuple;
996         TupleDesc tupdesc;
997
998         /*
999          * Get the version and configure options of Senna. Ignore the
1000          * return value of sen_info() because it always returns a success.
1001          */
1002         sen_info((char **)&version, (char **)&coptions, NULL, NULL, NULL, NULL);
1003
1004         /*
1005          * Construct a tuple descriptor for the result row. This must
1006          * match this function's ludia_funcs--x.x.sql entry.
1007          */
1008         tupdesc = CreateTemplateTupleDesc(2, false);
1009         TupleDescInitEntry(tupdesc, (AttrNumber) 1,
1010                                            "version", TEXTOID, -1, 0);
1011         TupleDescInitEntry(tupdesc, (AttrNumber) 2,
1012                                            "configure_options", TEXTOID, -1, 0);
1013         tupdesc = BlessTupleDesc(tupdesc);
1014
1015         /* version */
1016         values[0] = CStringGetTextDatum(*version);
1017         isnull[0] = false;
1018
1019         /* configure option */
1020         values[1] = CStringGetTextDatum(*coptions);
1021         isnull[1] = false;
1022
1023         tuple = heap_form_tuple(tupdesc, values, isnull);
1024         PG_RETURN_DATUM(HeapTupleGetDatum(tuple));
1025 }