OSDN Git Service

Support terse logging in enable_debug.
[ludiafuncs/ludia_funcs.git] / ludia_funcs.c
1 /*-------------------------------------------------------------------------
2  *
3  * Copyright (c) 2016-2020, ludia_funcs Development Group
4  * Copyright (c) 2006-2015, NTT DATA Corporation
5  * All rights reserved.
6  *
7  * Changelog:
8  *   2013/01/09
9  *   Update Ludia functions so that they are available with PostgreSQL9.1.
10  *   Author: NTT DATA Corporation
11  *
12  *-------------------------------------------------------------------------
13  */
14 #include "postgres.h"
15
16 #include <limits.h>
17 #include <sys/types.h>
18 #include <sys/stat.h>
19 #include <unistd.h>
20
21 #include "catalog/pg_type.h"
22 #include "fmgr.h"
23 #include "funcapi.h"
24 #include "ludia_funcs.h"
25 #include "mb/pg_wchar.h"
26 #include "senna.h"
27 #include "storage/fd.h"
28 #include "utils/builtins.h"
29 #include "utils/guc.h"
30 #include "miscadmin.h"
31
32 #if PG_VERSION_NUM >= 90300
33 #include "access/htup_details.h"
34 #endif
35
36 PG_MODULE_MAGIC;
37
38 /* Last update date of ludia_funcs */
39 #define PGS2_LAST_UPDATE        "2019.10.04"
40
41 /* GUC variables */
42 #ifdef PGS2_DEBUG
43 typedef enum pgs2_enable_debug_type
44 {
45     PGS2_ENABLE_DEBUG_OFF,              /* logs no debug log */
46     PGS2_ENABLE_DEBUG_TERSE,    /* logs tersely, e.g., just names of
47                                                                    functions */
48     PGS2_ENABLE_DEBUG_ON                /* logs detailed infomation */
49 } pgs2_enable_debug_type;
50
51 /* We accept all the likely variants of "on" and "off" */
52 static const struct config_enum_entry pgs2_enable_debug_options[] = {
53         {"off", PGS2_ENABLE_DEBUG_OFF, false},
54         {"terse", PGS2_ENABLE_DEBUG_TERSE, false},
55         {"on", PGS2_ENABLE_DEBUG_ON, false},
56         {"true", PGS2_ENABLE_DEBUG_ON, true},
57         {"false", PGS2_ENABLE_DEBUG_OFF, true},
58         {"yes", PGS2_ENABLE_DEBUG_ON, true},
59         {"no", PGS2_ENABLE_DEBUG_OFF, true},
60         {"1", PGS2_ENABLE_DEBUG_ON, true},
61         {"0", PGS2_ENABLE_DEBUG_OFF, true},
62         {NULL, 0, false}
63 };
64
65 static int      pgs2_enable_debug = PGS2_ENABLE_DEBUG_OFF;
66 #endif  /* PGS2_DEBUG */
67
68 static char     *pgs2_last_update = NULL;
69 static int      norm_cache_limit = -1;
70 static bool     escape_snippet_keyword = false;
71
72 #define SEN_NORMALIZE_FLAGS 0
73 #define SEN_MAX_N_EXPRS         32
74
75 /* upper limit for GUC variables measured in kilobytes of memory */
76 /* note that various places assume the byte size fits in a "long" variable */
77 #if SIZEOF_SIZE_T > 4 && SIZEOF_LONG > 4
78 #define MAX_KILOBYTES   INT_MAX
79 #else
80 #define MAX_KILOBYTES   (INT_MAX / 1024)
81 #endif
82
83 #define ISBACKSLASHCHAR(x) (*(x) == '\\')
84 #define ISDOUBLEQUOTECHAR(x) (*(x) == '"')
85 #define ISSENNAOPSCHAR(x) (*(x) == '+' || *(x) == '-' || *(x) == ' ')
86
87 PG_FUNCTION_INFO_V1(pgs2snippet1);
88 PG_FUNCTION_INFO_V1(pgs2norm);
89 PG_FUNCTION_INFO_V1(pgs2textporter1);
90 PG_FUNCTION_INFO_V1(pgs2seninfo);
91
92 /*
93  * The function prototypes are created as a part of PG_FUNCTION_INFO_V1
94  * macro since 9.4, and hence the declaration of the function prototypes
95  * here is necessary only for 9.3 or before.
96  */
97 #if PG_VERSION_NUM < 90400
98 Datum   pgs2snippet1(PG_FUNCTION_ARGS);
99 Datum   pgs2norm(PG_FUNCTION_ARGS);
100 Datum   pgs2textporter1(PG_FUNCTION_ARGS);
101 Datum   pgs2seninfo(PG_FUNCTION_ARGS);
102 #endif
103
104 static sen_encoding     GetSennaEncoding(void);
105 static sen_query        *GetSennaQuery(char *str, size_t len);
106 static bool                     EscapeSnippetKeyword(char **s, size_t *slen);
107
108 #ifdef TEXTPORTER
109 #define TEXTPORTER_TMPDIR                       "/tmp"
110 #define TEXTPORTER_MKSTEMP_UMASK                0177
111 #define TEXTPORTER_GROUPNAME            "UTF-8"
112 #define TEXTPORTER_DEFLANGNAME          "Japanese"
113 #define TEXTPORTER_BBIGENDIAN           1
114 #define TEXTPORTER_OPTION                       0x00000020      /* DMC_GETTEXT_OPT_LF */
115 #define TEXTPORTER_OPTION_STRING        "32"
116 #define TEXTPORTER_OPTION1                      0x00010000      /* DMC_GETTEXT_OPT1_TXCONV */
117 #define TEXTPORTER_SIZE                         0
118 #define TEXTPORTER_CSV_C                        0
119
120
121 /* GUC variables for pgs2textpoter1 */
122 static int      textporter_error = ERROR;
123 static unsigned int     textporter_option = TEXTPORTER_OPTION;
124
125 /*
126  * This variable is a dummy that doesn't do anything, except in some
127  * cases provides the value for SHOW to display.  The real state is
128  * elsewhere and is kept in sync by assign_hooks.
129  */
130 static char     *textporter_option_string;
131
132 static const struct config_enum_entry textporter_error_options[] = {
133         {"debug1", DEBUG1, false},
134         {"log", LOG, false},
135         {"info", INFO, false},
136         {"notice", NOTICE, false},
137         {"warning", WARNING, false},
138         {"error", ERROR, false},
139         {NULL, 0, false}
140 };
141
142 static void CleanupTextPorterTmpFiles(void);
143
144 static bool check_textporter_option(char **newval, void **extra, GucSource source);
145 static void assign_textporter_option(const char *newval, void *extra);
146 #endif  /* TEXTPORTER */
147
148 void    _PG_init(void);
149 void    _PG_fini(void);
150
151 void
152 _PG_init(void)
153 {
154         sen_rc          rc;
155
156 #ifdef PGS2_DEBUG
157         /* Define custom GUC variable for debugging */
158         DefineCustomEnumVariable("ludia_funcs.enable_debug",
159                                                          "Emit ludia_funcs debugging output.",
160                                                          NULL,
161                                                          &pgs2_enable_debug,
162                                                          PGS2_ENABLE_DEBUG_OFF,
163                                                          pgs2_enable_debug_options,
164                                                          PGC_USERSET,
165                                                          0,
166                                                          NULL,
167                                                          NULL,
168                                                          NULL);
169 #endif
170
171         /* Can't be set in postgresql.conf */
172         DefineCustomStringVariable("ludia_funcs.last_update",
173                                                            "Shows the last update date of ludia_funcs.",
174                                                            NULL,
175                                                            &pgs2_last_update,
176                                                            PGS2_LAST_UPDATE,
177                                                            PGC_INTERNAL,
178                                                            GUC_REPORT | GUC_NOT_IN_SAMPLE | GUC_DISALLOW_IN_FILE,
179                                                            NULL,
180                                                            NULL,
181                                                            NULL);
182
183 #ifdef TEXTPORTER
184         /* Define custom GUC variables */
185         DefineCustomEnumVariable("ludia_funcs.textporter_error",
186                                                          "Sets the message levels that are emitted "
187                                                          "when textporter fails.",
188                                                          NULL,
189                                                          &textporter_error,
190                                                          ERROR,
191                                                          textporter_error_options,
192                                                          PGC_SUSET,
193                                                          0,
194                                                          NULL,
195                                                          NULL,
196                                                          NULL);
197
198         DefineCustomStringVariable("ludia_funcs.textporter_option",
199                                                            "Sets the option used to get text data "
200                                                            "from TextPorter",
201                                                            NULL,
202                                                            &textporter_option_string,
203                                                            TEXTPORTER_OPTION_STRING,
204                                                            PGC_SUSET,
205                                                            0,
206                                                            check_textporter_option,
207                                                            assign_textporter_option,
208                                                            NULL);
209
210         /* Clean up remaining textporter temporary files */
211         CleanupTextPorterTmpFiles();
212 #endif  /* TEXTPORTER */
213
214         /*
215          * A value of 0 means no limit on the cache size. A value of -1 means
216          * that work_mem is used as the upper size limit of the cache.
217          */
218         DefineCustomIntVariable("ludia_funcs.norm_cache_limit",
219                                                         "Sets the maximum memory to be used for caching "
220                                                         "the result of pgs2norm()",
221                                                         NULL,
222                                                         &norm_cache_limit,
223                                                         -1,
224                                                         -1,
225                                                         MAX_KILOBYTES,
226                                                         PGC_USERSET,
227                                                         GUC_UNIT_KB,
228                                                         NULL,
229                                                         NULL,
230                                                         NULL);
231
232         DefineCustomBoolVariable("ludia_funcs.escape_snippet_keyword",
233                                                          "Escapes snippet keyword string.",
234                                                          NULL,
235                                                          &escape_snippet_keyword,
236                                                          false,
237                                                          PGC_USERSET,
238                                                          0,
239                                                          NULL,
240                                                          NULL,
241                                                          NULL);
242
243         EmitWarningsOnPlaceholders("ludia_funcs");
244
245         /* Initialize Senna */
246         rc = sen_init();
247         if (rc != sen_success)
248                 ereport(ERROR,
249                                 (errmsg("sen_init() failed: %d", rc)));
250 }
251
252 void
253 _PG_fini(void)
254 {
255 }
256
257 #ifdef TEXTPORTER
258 #define REMOVE_TMPFILE(path)                                                                                    \
259         do {                                                                                                                            \
260                 if (unlink(path) != 0)                                                                                  \
261                         ereport(WARNING,                                                                                        \
262                                         (errcode_for_file_access(),                                                     \
263                                          errmsg("could not remove temporary file \"%s\": %m", path))); \
264         } while(0)
265
266 Datum
267 pgs2textporter1(PG_FUNCTION_ARGS)
268 {
269         char    *appfile = text_to_cstring(PG_GETARG_TEXT_P(0));
270         char    txtfile[] = TEXTPORTER_TMPDIR "/ludia_funcs_XXXXXX";
271         int             tmpfd;
272         int             ret;
273         FILE    *fp;
274         text    *result = NULL;
275         struct stat     statbuf;
276         bool    return_null = false;
277         mode_t  oumask;
278
279         /* Confirm that database encoding is UTF-8 */
280         GetSennaEncoding();
281
282         PG_TRY();
283         {
284                 /*
285                  * Generate a unique temporary filename where text data gotten
286                  * from application file by TextPorter is stored temporarily.
287                  * Set the permission of a temporary file to 0600 to ensure that
288                  * only the owner of PostgreSQL server can read and write the file.
289                  */
290                 oumask = umask(TEXTPORTER_MKSTEMP_UMASK);
291                 tmpfd = mkstemp(txtfile);
292                 umask(oumask);
293
294                 if (tmpfd < 0)
295                         ereport(ERROR,
296                                         (errcode_for_file_access(),
297                                          errmsg("could not generate a unique temporary filename: %m")));
298                 if (close(tmpfd) != 0)
299                         ereport(ERROR,
300                                         (errcode_for_file_access(),
301                                          errmsg("could not close temporary file \"%s\": %m", txtfile)));
302
303                 /*
304                  * Run TextPorter to read text data from application file (appfile)
305                  * to temporary file (txtfile).
306                  */
307                 ret = ExecTextPorter((unsigned char *)appfile,
308                                                          (unsigned char *)txtfile,
309                                                          (unsigned char *)TEXTPORTER_GROUPNAME,
310                                                          (unsigned char *)TEXTPORTER_DEFLANGNAME,
311                                                          TEXTPORTER_BBIGENDIAN, textporter_option,
312                                                          TEXTPORTER_OPTION1, TEXTPORTER_SIZE,
313                                                          TEXTPORTER_CSV_C);
314                 if (ret != 0)
315                 {
316                         ereport(textporter_error,
317                                         (errmsg("could not get text from application file \"%s\"",
318                                                         appfile),
319                                          errdetail("DMC_GetText_V5() failed with errcode %d",
320                                                            ret)));
321
322                         /* Return NULL if textporter_error is set to other than ERROR */
323                         return_null = true;
324                 }
325                 else
326                 {
327                         /* Read text data from temporary file to memory */
328                         if (stat(txtfile, &statbuf))
329                                 ereport(ERROR,
330                                                 (errcode_for_file_access(),
331                                                  errmsg("could not stat file \"%s\": %m", txtfile)));
332                         result = (text *) palloc(statbuf.st_size + VARHDRSZ);
333
334                         fp = AllocateFile(txtfile, "r");
335                         if (fp == NULL)
336                                 ereport(ERROR,
337                                                 (errcode_for_file_access(),
338                                                  errmsg("could not open file \"%s\": %m", txtfile)));
339
340                         if (fread(VARDATA(result), 1, statbuf.st_size, fp) != statbuf.st_size ||
341                                 ferror(fp))
342                                 ereport(ERROR,
343                                                 (errcode_for_file_access(),
344                                                  errmsg("could not read file \"%s\": %m", txtfile)));
345
346                         FreeFile(fp);
347                 }
348
349                 REMOVE_TMPFILE(txtfile);
350                 pfree(appfile);
351         }
352         PG_CATCH();
353         {
354                 REMOVE_TMPFILE(txtfile);
355                 PG_RE_THROW();
356         }
357         PG_END_TRY();
358
359         if (return_null)
360                 PG_RETURN_NULL();
361
362         SET_VARSIZE(result, statbuf.st_size + VARHDRSZ);
363
364         PG_RETURN_TEXT_P(result);
365 }
366
367 /*
368  * Clean up remaining textporter temporary files
369  */
370 static void
371 CleanupTextPorterTmpFiles(void)
372 {
373         DIR                             *tpdir;
374         struct dirent   *tpde;
375         char                    path[MAXPGPATH];
376
377         tpdir = AllocateDir(TEXTPORTER_TMPDIR);
378         if (tpdir == NULL)
379                 ereport(ERROR,
380                                 (errcode_for_file_access(),
381                                  errmsg("could not open textporter temporary file directory \"%s\": %m",
382                                                 TEXTPORTER_TMPDIR)));
383
384         while ((tpde = ReadDir(tpdir, TEXTPORTER_TMPDIR)) != NULL)
385         {
386                 if (strlen(tpde->d_name) == 18 &&
387                         strncmp(tpde->d_name, "ludia_funcs_", 12) == 0)
388                 {
389                         snprintf(path, MAXPGPATH, TEXTPORTER_TMPDIR "/%s", tpde->d_name);
390                         REMOVE_TMPFILE(path);
391                 }
392         }
393
394         FreeDir(tpdir);
395 }
396
397 static bool
398 check_textporter_option(char **newval, void **extra, GucSource source)
399 {
400         unsigned long   val;
401         char                    *endptr;
402         unsigned int    *myextra;
403
404         errno = 0;
405         val = strtoul(*newval, &endptr, 0);
406
407         if (*endptr != '\0')
408                 return false;
409
410         if (errno == ERANGE || val != (unsigned long) ((unsigned int) val))
411         {
412                 GUC_check_errhint("Value exceeds unsigned integer range.");
413                 return false;
414         }
415
416         /* Set up the "extra" struct actually used by assign_textporter_option */
417         myextra = (unsigned int *) malloc(sizeof(unsigned int));
418         if (myextra == NULL)
419         {
420                 GUC_check_errcode(ERRCODE_OUT_OF_MEMORY);
421                 GUC_check_errmsg("out of memory");
422                 return false;
423         }
424         *myextra = (unsigned int) val;
425         *extra = (void *) myextra;
426
427         return true;
428 }
429
430 static void
431 assign_textporter_option(const char *newval, void *extra)
432 {
433         textporter_option = *((unsigned int *) extra);
434 }
435
436 #else   /* TEXTPORTER */
437
438 Datum
439 pgs2textporter1(PG_FUNCTION_ARGS)
440 {
441         PG_RETURN_NULL();
442 }
443
444 #endif  /* TEXTPORTER */
445
446 static sen_encoding
447 GetSennaEncoding(void)
448 {
449         static sen_encoding             encoding = sen_enc_default;
450
451         if (encoding == sen_enc_default)
452         {
453                 if (GetDatabaseEncoding() == PG_UTF8)
454                         encoding = sen_enc_utf8;
455                 else
456                         ereport(ERROR,
457                                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
458                                          errmsg("does not support database encoding \"%s\"",
459                                                         GetDatabaseEncodingName())));
460         }
461         return encoding;
462 }
463
464 /*
465  * Escape the backslash and double quote characters in the given string.
466  *
467  * Return false if the given string has no character which needs to be
468  * escaped. Otherwise, return true. In this case, **s points the palloc'd
469  * space storing the escaped keyword string and *slen is set to the size
470  * of that string. The caller needs to free the palloc'd space.
471  */
472 static bool
473 EscapeSnippetKeyword(char **s, size_t *slen)
474 {
475         const char      *sp;
476         char            *ep;
477         char            *escaped;
478         int                     mblen;
479         int                     copylen;
480         bool            in_doublequote = false;
481         bool            in_sennaops = false;
482         bool            need_escape = false;
483
484         /*
485          * Skip the heading double quote character because it always doesn't
486          * need to be interpreted as a character itself and be escaped.
487          * Note that we must not skip the heading character if it's not a
488          * double quote.
489          */
490         sp = *s;
491         if (ISDOUBLEQUOTECHAR(sp))
492                 sp++;
493
494         /*
495          * Check whether the snippet keyword string has a character which
496          * needs to be escaped.
497          */
498         while ((sp - *s) < *slen)
499         {
500                 mblen = pg_mblen(sp);
501
502                 /*
503                  * Backslash in the keyword always needs to be escaped.
504                  */
505                 if (ISBACKSLASHCHAR(sp))
506                 {
507                         need_escape = true;
508                         break;
509                 }
510
511                 if (in_doublequote)
512                 {
513                         if (ISSENNAOPSCHAR(sp))
514                         {
515                                 in_sennaops = true;
516                                 in_doublequote = false;
517                         }
518                         else
519                         {
520                                 /*
521                                  * Double quote in the keyword needs to be escaped if
522                                  * any Senna search operators are to neither its right
523                                  * nor left.
524                                  */
525                                 need_escape = true;
526                                 break;
527                         }
528                 }
529                 else
530                 {
531                         if (ISDOUBLEQUOTECHAR(sp) && !in_sennaops)
532                                 in_doublequote = true;
533                         if (!ISSENNAOPSCHAR(sp))
534                                 in_sennaops = false;
535                 }
536
537                 sp += mblen;
538         }
539
540         /*
541          * Quick exit if the keyword has no character which needs to be
542          * escaped.
543          */
544         if (!need_escape)
545                 return false;
546
547         /*
548          * Allocate the buffer space to store the escaped snippet keyword string.
549          * The maximum size of escaped string is double the input keyword size.
550          * The size reaches the maximum when every character in the input keyword
551          * needs to be escaped.
552          */
553         ep = escaped = (char *) palloc(*slen * 2);
554
555         /*
556          * Copy the characters which have been passed through in the above loop
557          * and don't need to be escaped, into the buffer. If in_doublequote is
558          * true, we don't copy the double quote in the previous position into the
559          * buffer because it might still need to be escaped.
560          */
561         copylen = sp - *s - ((in_doublequote) ? 1 : 0);
562         memcpy(ep, *s, copylen);
563         ep += copylen;
564
565         /*
566          * Construct the escaped snippet keyword string.
567          */
568         while ((sp - *s) < *slen)
569         {
570                 mblen = pg_mblen(sp);
571
572                 if (in_doublequote)
573                 {
574                         /*
575                          * dqchar indicates the previous character, that is a double
576                          * quote. We assume here that a double quote is single-byte
577                          * character.
578                          */
579                         char dqchar     = *(sp - 1);
580
581                         if (ISSENNAOPSCHAR(sp))
582                         {
583                                 /*
584                                  * Don't escape the double quote which is just before Senna
585                                  * operator.
586                                  */
587                                 *ep++ = dqchar;
588                                 *ep++ = *sp;
589                                 in_sennaops = true;
590                                 in_doublequote = false;
591                         }
592                         else
593                         {
594                                 /*
595                                  * Escape the double quote if no Senna operator is next to it.
596                                  */
597                                 *ep++ = '\\';
598                                 *ep++ = dqchar;
599
600                                 if (ISDOUBLEQUOTECHAR(sp))
601                                         in_doublequote = true;
602                                 else
603                                 {
604                                         if (ISBACKSLASHCHAR(sp))
605                                                 *ep++ = '\\';
606                                         memcpy(ep, sp, mblen);
607                                         ep += mblen;
608                                         in_doublequote = false;
609                                 }
610                         }
611                 }
612                 else
613                 {
614                         if (ISDOUBLEQUOTECHAR(sp))
615                         {
616                                 /*
617                                  * Don't escape the double quote which is just after Senna
618                                  * operator.
619                                  */
620                                 if (in_sennaops)
621                                         *ep++ = *sp;
622                                 else
623                                         in_doublequote = true;
624                         }
625                         else
626                         {
627                                 if (ISBACKSLASHCHAR(sp))
628                                         *ep++ = '\\';
629                                 /*
630                                  * We don't check ISSENNAOPSCHAR() here. We handle Senna
631                                  * operator character as a character itself instead of
632                                  * an operator if it doesn't follow a double quote.
633                                  */
634                                 memcpy(ep, sp, mblen);
635                                 ep += mblen;
636                         }
637
638                         if (!ISSENNAOPSCHAR(sp))
639                                 in_sennaops = false;
640                 }
641
642                 sp += mblen;
643         }
644
645         /* Add the tailing double quote into the buffer */
646         if (in_doublequote)
647                 *ep++ = *(sp - 1);
648
649         *s = escaped;
650         *slen = ep - *s;
651
652 #ifdef PGS2_DEBUG
653         if (pgs2_enable_debug == PGS2_ENABLE_DEBUG_ON)
654         {
655                 char    *tmp = pnstrdup(*s, *slen);
656
657                 elog(LOG, "escaped snippet keyword: %s", tmp);
658                 pfree(tmp);
659         }
660         else if (pgs2_enable_debug == PGS2_ENABLE_DEBUG_TERSE)
661                 elog(LOG, "escaped snippet keyword");
662 #endif
663
664         return true;
665 }
666
667 static sen_query *
668 GetSennaQuery(char *str, size_t len)
669 {
670         static sen_query        *query_cache = NULL;
671         static char                     *key_cache = NULL;
672         static size_t           len_cache = 0;
673         static bool                     guc_cache = false;
674         sen_query       *query;
675         sen_encoding    encoding;
676         char            *key;
677         size_t          key_len;
678         int                     rest;
679         bool            needfree = false;
680
681         /*
682          * Return the cached Senna query if the same keyword has
683          * been used the last time.
684          */
685         if (key_cache != NULL &&
686                 len == len_cache &&
687                 strncmp(key_cache, str, len) == 0 &&
688                 escape_snippet_keyword == guc_cache)
689         {
690 #ifdef PGS2_DEBUG
691                 if (pgs2_enable_debug == PGS2_ENABLE_DEBUG_ON)
692                 {
693                         char    *tmp = pnstrdup(str, len);
694
695                         elog(LOG, "GetSennaQuery(): quick exit: %s", tmp);
696                         pfree(tmp);
697                 }
698                 else if (pgs2_enable_debug == PGS2_ENABLE_DEBUG_TERSE)
699                                 elog(LOG, "GetSennaQuery(): quick exit");
700 #endif
701                 return query_cache;
702         }
703
704         encoding = GetSennaEncoding();
705
706         key = malloc(len);
707         if (key == NULL)
708                 ereport(ERROR,
709                                 (errcode(ERRCODE_OUT_OF_MEMORY),
710                                  errmsg("out of memory")));
711
712         /*
713          * We always cache the unescaped keyword. Which enables us
714          * to check whether we can use the cached Senna query before
715          * escaping the keyword.
716          */
717         memcpy(key, str, len);
718         key_len = len;
719
720         /*
721          * If the keyword has been escaped, 'str' points to the
722          * newly-palloc'd space storing the escaped keyword. This
723          * space needs to be freed later.
724          */
725         if (escape_snippet_keyword)
726                 needfree = EscapeSnippetKeyword(&str, &len);
727
728         query = sen_query_open(str, len, sen_sel_or, SEN_MAX_N_EXPRS,
729                                                    encoding);
730         if (query == NULL)
731         {
732                 free(key);
733                 ereport(ERROR,
734                                 (errmsg("sen_query_open() failed")));
735         }
736
737         if ((rest = sen_query_rest(query, NULL)) != 0)
738                 ereport(WARNING,
739                                 (errmsg("too many expressions (%d)", rest)));
740
741         if (query_cache != NULL)
742         {
743                 sen_query_close(query_cache);
744                 free(key_cache);
745         }
746
747         key_cache = key;
748         len_cache = key_len;
749         query_cache = query;
750         guc_cache = escape_snippet_keyword;
751
752         if (needfree)
753                 pfree(str);
754
755         return query;
756 }
757
758 Datum
759 pgs2snippet1(PG_FUNCTION_ARGS)
760 {
761         int                     flags = PG_GETARG_INT32(0);
762         uint32          width = PG_GETARG_UINT32(1);
763         uint32          max_results = PG_GETARG_UINT32(2);
764         text       *opentags = PG_GETARG_TEXT_P(3);
765         text       *closetags = PG_GETARG_TEXT_P(4);
766         int                     mapping = PG_GETARG_INT32(5);
767         text       *keywords = PG_GETARG_TEXT_P(6);
768         text       *document = PG_GETARG_TEXT_P(7);
769         sen_query  *query;
770         sen_snip   *snip = NULL;
771         const char *opentags_str = VARDATA_ANY(opentags);
772         const char *closetags_str = VARDATA_ANY(closetags);
773         char       *keywords_str = VARDATA_ANY(keywords);
774         char       *document_str = VARDATA_ANY(document);
775         uint32          opentags_len = VARSIZE_ANY_EXHDR(opentags);
776         uint32          closetags_len = VARSIZE_ANY_EXHDR(closetags);
777         uint32          keywords_len = VARSIZE_ANY_EXHDR(keywords);
778         uint32          document_len = VARSIZE_ANY_EXHDR(document);
779         uint32          nresults = 0;
780         uint32          max_tagged_len = 0;
781         sen_rc          rc;
782         text       *result;
783         uint32          result_len = 0;
784         bool            return_null = false;
785
786         query = GetSennaQuery(keywords_str, keywords_len);
787
788         snip = sen_query_snip(query, flags, width, max_results, 1,
789                                                   &opentags_str, &opentags_len,
790                                                   &closetags_str, &closetags_len,
791                                                   mapping == 0 ? NULL : (sen_snip_mapping *)-1);
792         if (snip == NULL)
793                 ereport(ERROR,
794                                 (errmsg("sen_query_snip() failed")));
795
796         PG_TRY();
797         {
798                 rc = sen_snip_exec(snip, document_str, document_len,
799                                                    &nresults, &max_tagged_len);
800                 if (rc != sen_success)
801                         ereport(ERROR,
802                                         (errmsg("sen_snip_exec() failed: %d", rc)));
803
804                 result = (text *) palloc(max_tagged_len + VARHDRSZ);
805
806                 rc = sen_snip_get_result(snip, 0, VARDATA(result), &result_len);
807                 if (rc == sen_invalid_argument)
808                         return_null = true;
809                 else if (rc != sen_success)
810                         ereport(ERROR,
811                                         (errmsg("sen_snip_get_result() failed: %d", rc)));
812         }
813         PG_CATCH();
814         {
815                 sen_snip_close(snip);
816                 PG_RE_THROW();
817         }
818         PG_END_TRY();
819
820         sen_snip_close(snip);
821
822         if (return_null)
823                 PG_RETURN_NULL();
824
825         SET_VARSIZE(result, max_tagged_len + VARHDRSZ);
826
827         PG_RETURN_TEXT_P(result);
828 }
829
830 /*
831  * Make sure there is enough space for 'needed' more bytes.
832  *
833  * Sets **buf to the allocated space which can store the needed bytes if OK,
834  * NULL if failed to enlarge the space because 'needed' is larger than 'maxlen'.
835  */
836 static inline void
837 pgs2malloc(void **buf, long *buflen, long needed, long maxlen)
838 {
839 #ifdef PGS2_DEBUG
840         if (pgs2_enable_debug == PGS2_ENABLE_DEBUG_ON)
841                 elog(LOG, "pgs2malloc(): buflen %ld, needed %ld, maxlen %ld",
842                          *buflen, needed, maxlen);
843         else if (pgs2_enable_debug == PGS2_ENABLE_DEBUG_TERSE)
844                 elog(LOG, "pgs2malloc()");
845 #endif
846
847         if (*buf != NULL && *buflen >= needed && (*buflen <= maxlen || maxlen == 0))
848                 return;         /* got enough space already */
849
850         /*
851          * Release the already-allocated space since it's too small to
852          * store the needed bytes or larger than the upper limit.
853          */
854         if (*buf != NULL)
855         {
856                 free(*buf);
857                 *buf = NULL;
858                 *buflen = 0;
859         }
860
861         /*
862          * Don't allocate any space if the needed space is larger than
863          * the upper limit.
864          */
865         if (needed > maxlen && maxlen != 0)
866                 return;
867
868         /*
869          * Allocate the space for the needed bytes.
870          *
871          * We don't want to allocate just a little more space with each enlarge;
872          * for efficiency, double the buffer size each time it overflows.
873          * Actually, we might need to more than double it if 'needed' is big...
874          *
875          * We check whether '*buflen' overflows each cycle to avoid infinite loop.
876          */
877         *buflen = 1024L;
878         while (*buflen < needed && *buflen != 0)
879                 *buflen <<= 1;
880
881         /*
882          * Clamp to maxlen in case we went past it.  Note we are assuming
883          * here that maxlen <= LONG_MAX/2, else the above loop could
884          * overflow.  We will still have *buflen >= needed.
885          */
886         if (*buflen > maxlen && maxlen != 0)
887                 *buflen = maxlen;
888
889         /* Guard against out-of-range '*buflen' value */
890         if (*buflen == 0)
891                 ereport(ERROR,
892                                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
893                                  errmsg("out of memory"),
894                                  errdetail("Cannot enlarge buffer by %ld more bytes.",
895                                                    needed)));
896
897         *buf = (void *) malloc(*buflen);
898         if (*buf == NULL)
899                 ereport(ERROR,
900                                 (errcode(ERRCODE_OUT_OF_MEMORY),
901                                  errmsg("out of memory")));
902 }
903
904 Datum
905 pgs2norm(PG_FUNCTION_ARGS)
906 {
907         text            *str = PG_GETARG_TEXT_PP(0);
908         char            *s = VARDATA_ANY(str);
909         long            slen = VARSIZE_ANY_EXHDR(str);
910         text            *result = NULL;
911         long            buflen;
912         long            reslen;
913         long            maxlen;
914         long            needed;
915
916         /*
917          * norm_cache is the cache memory storing both input and normalized strings
918          * as the result of pgs2norm(). norm_cache_size is the size of norm_cache
919          * and its upper limit is specified by norm_cache_limit parameter. norm_result
920          * is the pointer to the normalized string with the verlena header (i.e.,
921          * text type) stored in the latter half of the cache. norm_reslen is the size
922          * of norm_result. norm_slen is the size of the input string which is stored
923          * in the first half of the cache.
924          */
925         static char             *norm_cache = NULL;
926         static long             norm_cache_size = 0;
927         static long             norm_slen = 0;
928         static char             *norm_result = NULL;
929         static long             norm_reslen = 0;
930
931         /*
932          * Return the cached normalization result if the same string of
933          * the given one has been normalized the last time.
934          */
935         if (norm_cache != NULL &&
936                 norm_slen == slen &&
937                 strncmp(norm_cache, s, slen) == 0)
938         {
939 #ifdef PGS2_DEBUG
940                 if (pgs2_enable_debug == PGS2_ENABLE_DEBUG_ON)
941                 {
942                         char    *tmp = text_to_cstring(str);
943
944                         elog(LOG, "pgs2norm(): quick exit: %s", tmp);
945                         pfree(tmp);
946                 }
947                 else if (pgs2_enable_debug == PGS2_ENABLE_DEBUG_TERSE)
948                                 elog(LOG, "pgs2norm(): quick exit");
949 #endif
950
951                 result = (text *) palloc(norm_reslen);
952                 memcpy(result, norm_result, norm_reslen);
953                 PG_RETURN_TEXT_P(result);
954         }
955
956         /* Confirm that database encoding is UTF-8 */
957         GetSennaEncoding();
958
959         /*
960          * Allocate the result buffer to store the normalized string. Since the size of
961          * normalized string can be larger than that of input one, the result buffer needs
962          * extra space. Problem is that, before calling sen_str_normalize, we need to
963          * allocate the result buffer but cannot know how large extra space is required.
964          * So we use RESULT_EXTRA_SIZE as the estimated size of extra space here.
965          */
966 #define RESULT_EXTRA_SIZE       64
967         buflen = slen + RESULT_EXTRA_SIZE;
968
969 retry:
970         result = (text *) palloc(buflen + VARHDRSZ);
971
972 #if defined(FAST_SENNA)
973         reslen = fast_sen_str_normalize(s, slen, VARDATA(result), buflen);
974 #else
975         reslen = sen_str_normalize(s, slen, sen_enc_utf8,
976                                                            SEN_NORMALIZE_FLAGS,
977                                                            VARDATA(result), buflen);
978 #endif
979
980         if (reslen < 0)
981                 ereport(ERROR,
982                                 (errmsg("could not normalize the string")));
983
984         /*
985          * If the result buffer size is too short to store the normalized string,
986          * we enlarge the buffer and retry the string normalization.
987          */
988         if (buflen <= reslen)
989         {
990                 pfree(result);
991                 buflen = reslen + 1;
992                 goto retry;
993         }
994
995         SET_VARSIZE(result, reslen + VARHDRSZ);
996
997         /*
998          * Cache both input and normalized strings to accelerate the subsequent
999          * calls of pgs2norm() with the same input string. But we don't do that
1000          * if the maximum allowed size of the cache is too small to store them.
1001          */
1002         needed = slen + reslen + VARHDRSZ;
1003         maxlen = ((norm_cache_limit >= 0) ? norm_cache_limit : work_mem) * 1024L;
1004
1005         pgs2malloc((void **) &norm_cache, &norm_cache_size, needed, maxlen);
1006         if (norm_cache != NULL)
1007         {
1008                 /* Store the input string into the first half of the cache */
1009                 norm_slen = slen;
1010                 memcpy(norm_cache, s, slen);
1011
1012                 /*
1013                  * Store the normalized string with the varlena header (i.e., text type)
1014                  * into the latter half of the cache.
1015                  */
1016                 norm_result = norm_cache + slen;
1017                 norm_reslen = reslen + VARHDRSZ;
1018                 memcpy(norm_result, result, norm_reslen);
1019         }
1020
1021 #ifdef PGS2_DEBUG
1022         if (pgs2_enable_debug == PGS2_ENABLE_DEBUG_ON)
1023         {
1024                 char    *tmp = text_to_cstring(str);
1025
1026                 elog(LOG, "pgs2norm(): complete (%s result cache): %s",
1027                          (norm_cache == NULL) ? "unset" : "set", tmp);
1028                 pfree(tmp);
1029         }
1030         else if (pgs2_enable_debug == PGS2_ENABLE_DEBUG_TERSE)
1031                         elog(LOG, "pgs2norm(): complete");
1032 #endif
1033
1034         PG_RETURN_TEXT_P(result);
1035 }
1036
1037 /*
1038  * Report the version and configure options of Senna which
1039  * ludia_funcs depends on.
1040  */
1041 Datum
1042 pgs2seninfo(PG_FUNCTION_ARGS)
1043 {
1044         char    *version[MAXPGPATH];
1045         char    *coptions[MAXPGPATH];
1046         Datum   values[2];
1047         bool    isnull[2];
1048         HeapTuple tuple;
1049         TupleDesc tupdesc;
1050
1051         /*
1052          * Get the version and configure options of Senna. Ignore the
1053          * return value of sen_info() because it always returns a success.
1054          */
1055         sen_info((char **)&version, (char **)&coptions, NULL, NULL, NULL, NULL);
1056
1057         /*
1058          * Construct a tuple descriptor for the result row. This must
1059          * match this function's ludia_funcs--x.x.sql entry.
1060          */
1061 #if PG_VERSION_NUM >= 120000
1062         tupdesc = CreateTemplateTupleDesc(2);
1063 #else
1064         tupdesc = CreateTemplateTupleDesc(2, false);
1065 #endif
1066         TupleDescInitEntry(tupdesc, (AttrNumber) 1,
1067                                            "version", TEXTOID, -1, 0);
1068         TupleDescInitEntry(tupdesc, (AttrNumber) 2,
1069                                            "configure_options", TEXTOID, -1, 0);
1070         tupdesc = BlessTupleDesc(tupdesc);
1071
1072         /* version */
1073         values[0] = CStringGetTextDatum(*version);
1074         isnull[0] = false;
1075
1076         /* configure option */
1077         values[1] = CStringGetTextDatum(*coptions);
1078         isnull[1] = false;
1079
1080         tuple = heap_form_tuple(tupdesc, values, isnull);
1081         PG_RETURN_DATUM(HeapTupleGetDatum(tuple));
1082 }