1 /*-------------------------------------------------------------------------
3 * Copyright (c) 2016-2021, ludia_funcs Development Group
4 * Copyright (c) 2006-2015, NTT DATA Corporation
9 * Update Ludia functions so that they are available with PostgreSQL9.1.
10 * Author: NTT DATA Corporation
12 *-------------------------------------------------------------------------
17 #include <sys/types.h>
21 #include "catalog/pg_type.h"
24 #include "ludia_funcs.h"
25 #include "mb/pg_wchar.h"
27 #include "storage/fd.h"
28 #include "utils/builtins.h"
29 #include "utils/guc.h"
30 #include "miscadmin.h"
32 #if PG_VERSION_NUM >= 90300
33 #include "access/htup_details.h"
38 /* Last update date of ludia_funcs */
39 #define PGS2_LAST_UPDATE "2019.10.04"
43 typedef enum pgs2_enable_debug_type
45 PGS2_ENABLE_DEBUG_OFF, /* logs no debug log */
46 PGS2_ENABLE_DEBUG_TERSE, /* logs tersely, e.g., just names of
48 PGS2_ENABLE_DEBUG_ON /* logs detailed infomation */
49 } pgs2_enable_debug_type;
51 /* We accept all the likely variants of "on" and "off" */
52 static const struct config_enum_entry pgs2_enable_debug_options[] = {
53 {"off", PGS2_ENABLE_DEBUG_OFF, false},
54 {"terse", PGS2_ENABLE_DEBUG_TERSE, false},
55 {"on", PGS2_ENABLE_DEBUG_ON, false},
56 {"true", PGS2_ENABLE_DEBUG_ON, true},
57 {"false", PGS2_ENABLE_DEBUG_OFF, true},
58 {"yes", PGS2_ENABLE_DEBUG_ON, true},
59 {"no", PGS2_ENABLE_DEBUG_OFF, true},
60 {"1", PGS2_ENABLE_DEBUG_ON, true},
61 {"0", PGS2_ENABLE_DEBUG_OFF, true},
65 static int pgs2_enable_debug = PGS2_ENABLE_DEBUG_OFF;
66 #endif /* PGS2_DEBUG */
68 static char *pgs2_last_update = NULL;
69 static int norm_cache_limit = -1;
70 static bool escape_snippet_keyword = false;
72 #define SEN_NORMALIZE_FLAGS 0
73 #define SEN_MAX_N_EXPRS 32
75 /* upper limit for GUC variables measured in kilobytes of memory */
76 /* note that various places assume the byte size fits in a "long" variable */
77 #if SIZEOF_SIZE_T > 4 && SIZEOF_LONG > 4
78 #define MAX_KILOBYTES INT_MAX
80 #define MAX_KILOBYTES (INT_MAX / 1024)
83 #define ISBACKSLASHCHAR(x) (*(x) == '\\')
84 #define ISDOUBLEQUOTECHAR(x) (*(x) == '"')
85 #define ISSENNAOPSCHAR(x) (*(x) == '+' || *(x) == '-' || *(x) == ' ')
87 PG_FUNCTION_INFO_V1(pgs2snippet1);
88 PG_FUNCTION_INFO_V1(pgs2norm);
89 PG_FUNCTION_INFO_V1(pgs2textporter1);
90 PG_FUNCTION_INFO_V1(pgs2seninfo);
93 * The function prototypes are created as a part of PG_FUNCTION_INFO_V1
94 * macro since 9.4, and hence the declaration of the function prototypes
95 * here is necessary only for 9.3 or before.
97 #if PG_VERSION_NUM < 90400
98 Datum pgs2snippet1(PG_FUNCTION_ARGS);
99 Datum pgs2norm(PG_FUNCTION_ARGS);
100 Datum pgs2textporter1(PG_FUNCTION_ARGS);
101 Datum pgs2seninfo(PG_FUNCTION_ARGS);
104 static sen_encoding GetSennaEncoding(void);
105 static sen_query *GetSennaQuery(char *str, size_t len);
106 static bool EscapeSnippetKeyword(char **s, size_t *slen);
109 #define TEXTPORTER_TMPDIR "/tmp"
110 #define TEXTPORTER_MKSTEMP_UMASK 0177
111 #define TEXTPORTER_GROUPNAME "UTF-8"
112 #define TEXTPORTER_DEFLANGNAME "Japanese"
113 #define TEXTPORTER_BBIGENDIAN 1
114 #define TEXTPORTER_OPTION 0x00000020 /* DMC_GETTEXT_OPT_LF */
115 #define TEXTPORTER_OPTION_STRING "32"
116 #define TEXTPORTER_OPTION1 0x00010000 /* DMC_GETTEXT_OPT1_TXCONV */
117 #define TEXTPORTER_SIZE 0
118 #define TEXTPORTER_CSV_C 0
121 /* GUC variables for pgs2textpoter1 */
122 static int textporter_error = ERROR;
123 static unsigned int textporter_option = TEXTPORTER_OPTION;
124 static bool textporter_exit_on_segv = false;
127 * This variable is a dummy that doesn't do anything, except in some
128 * cases provides the value for SHOW to display. The real state is
129 * elsewhere and is kept in sync by assign_hooks.
131 static char *textporter_option_string;
133 static const struct config_enum_entry textporter_error_options[] = {
134 {"debug1", DEBUG1, false},
136 {"info", INFO, false},
137 {"notice", NOTICE, false},
138 {"warning", WARNING, false},
139 {"error", ERROR, false},
143 static void CleanupTextPorterTmpFiles(void);
145 static bool check_textporter_option(char **newval, void **extra, GucSource source);
146 static void assign_textporter_option(const char *newval, void *extra);
147 static void textporter_exit_on_segv_handler(SIGNAL_ARGS);
148 #endif /* TEXTPORTER */
159 /* Define custom GUC variable for debugging */
160 DefineCustomEnumVariable("ludia_funcs.enable_debug",
161 "Emit ludia_funcs debugging output.",
164 PGS2_ENABLE_DEBUG_OFF,
165 pgs2_enable_debug_options,
173 /* Can't be set in postgresql.conf */
174 DefineCustomStringVariable("ludia_funcs.last_update",
175 "Shows the last update date of ludia_funcs.",
180 GUC_REPORT | GUC_NOT_IN_SAMPLE | GUC_DISALLOW_IN_FILE,
186 /* Define custom GUC variables */
187 DefineCustomEnumVariable("ludia_funcs.textporter_error",
188 "Sets the message levels that are emitted "
189 "when textporter fails.",
193 textporter_error_options,
200 DefineCustomStringVariable("ludia_funcs.textporter_option",
201 "Sets the option used to get text data "
204 &textporter_option_string,
205 TEXTPORTER_OPTION_STRING,
208 check_textporter_option,
209 assign_textporter_option,
212 DefineCustomBoolVariable("ludia_funcs.textporter_exit_on_segv",
213 "Terminate session when textporter causes segmentation fault.",
215 &textporter_exit_on_segv,
223 /* Clean up remaining textporter temporary files */
224 CleanupTextPorterTmpFiles();
225 #endif /* TEXTPORTER */
228 * A value of 0 means no limit on the cache size. A value of -1 means
229 * that work_mem is used as the upper size limit of the cache.
231 DefineCustomIntVariable("ludia_funcs.norm_cache_limit",
232 "Sets the maximum memory to be used for caching "
233 "the result of pgs2norm()",
245 DefineCustomBoolVariable("ludia_funcs.escape_snippet_keyword",
246 "Escapes snippet keyword string.",
248 &escape_snippet_keyword,
256 EmitWarningsOnPlaceholders("ludia_funcs");
258 /* Initialize Senna */
260 if (rc != sen_success)
262 (errmsg("sen_init() failed: %d", rc)));
271 #define REMOVE_TMPFILE(path) \
273 if (unlink(path) != 0) \
275 (errcode_for_file_access(), \
276 errmsg("could not remove temporary file \"%s\": %m", path))); \
280 pgs2textporter1(PG_FUNCTION_ARGS)
282 char *appfile = text_to_cstring(PG_GETARG_TEXT_P(0));
283 char txtfile[] = TEXTPORTER_TMPDIR "/ludia_funcs_XXXXXX";
289 bool return_null = false;
292 /* Confirm that database encoding is UTF-8 */
298 * Generate a unique temporary filename where text data gotten
299 * from application file by TextPorter is stored temporarily.
300 * Set the permission of a temporary file to 0600 to ensure that
301 * only the owner of PostgreSQL server can read and write the file.
303 oumask = umask(TEXTPORTER_MKSTEMP_UMASK);
304 tmpfd = mkstemp(txtfile);
309 (errcode_for_file_access(),
310 errmsg("could not generate a unique temporary filename: %m")));
311 if (close(tmpfd) != 0)
313 (errcode_for_file_access(),
314 errmsg("could not close temporary file \"%s\": %m", txtfile)));
317 * If textporter_exit_on_segv option is enabled, segmentation fault
318 * caused by textporter will terminate only this connection and
319 * not lead to the server crash.
321 if (textporter_exit_on_segv)
322 pqsignal(SIGSEGV, textporter_exit_on_segv_handler);
325 * Run TextPorter to read text data from application file (appfile)
326 * to temporary file (txtfile).
328 ret = ExecTextPorter((unsigned char *)appfile,
329 (unsigned char *)txtfile,
330 (unsigned char *)TEXTPORTER_GROUPNAME,
331 (unsigned char *)TEXTPORTER_DEFLANGNAME,
332 TEXTPORTER_BBIGENDIAN, textporter_option,
333 TEXTPORTER_OPTION1, TEXTPORTER_SIZE,
336 if (textporter_exit_on_segv)
337 pqsignal(SIGSEGV, SIG_DFL);
341 ereport(textporter_error,
342 (errmsg("could not get text from application file \"%s\"",
344 errdetail("DMC_GetText_V5() failed with errcode %d",
347 /* Return NULL if textporter_error is set to other than ERROR */
352 /* Read text data from temporary file to memory */
353 if (stat(txtfile, &statbuf))
355 (errcode_for_file_access(),
356 errmsg("could not stat file \"%s\": %m", txtfile)));
357 result = (text *) palloc(statbuf.st_size + VARHDRSZ);
359 fp = AllocateFile(txtfile, "r");
362 (errcode_for_file_access(),
363 errmsg("could not open file \"%s\": %m", txtfile)));
365 if (fread(VARDATA(result), 1, statbuf.st_size, fp) != statbuf.st_size ||
368 (errcode_for_file_access(),
369 errmsg("could not read file \"%s\": %m", txtfile)));
374 REMOVE_TMPFILE(txtfile);
379 REMOVE_TMPFILE(txtfile);
387 SET_VARSIZE(result, statbuf.st_size + VARHDRSZ);
389 PG_RETURN_TEXT_P(result);
393 * Clean up remaining textporter temporary files
396 CleanupTextPorterTmpFiles(void)
400 char path[MAXPGPATH];
402 tpdir = AllocateDir(TEXTPORTER_TMPDIR);
405 (errcode_for_file_access(),
406 errmsg("could not open textporter temporary file directory \"%s\": %m",
407 TEXTPORTER_TMPDIR)));
409 while ((tpde = ReadDir(tpdir, TEXTPORTER_TMPDIR)) != NULL)
411 if (strlen(tpde->d_name) == 18 &&
412 strncmp(tpde->d_name, "ludia_funcs_", 12) == 0)
414 snprintf(path, MAXPGPATH, TEXTPORTER_TMPDIR "/%s", tpde->d_name);
415 REMOVE_TMPFILE(path);
423 check_textporter_option(char **newval, void **extra, GucSource source)
427 unsigned int *myextra;
430 val = strtoul(*newval, &endptr, 0);
435 if (errno == ERANGE || val != (unsigned long) ((unsigned int) val))
437 GUC_check_errhint("Value exceeds unsigned integer range.");
441 /* Set up the "extra" struct actually used by assign_textporter_option */
442 myextra = (unsigned int *) malloc(sizeof(unsigned int));
445 GUC_check_errcode(ERRCODE_OUT_OF_MEMORY);
446 GUC_check_errmsg("out of memory");
449 *myextra = (unsigned int) val;
450 *extra = (void *) myextra;
456 assign_textporter_option(const char *newval, void *extra)
458 textporter_option = *((unsigned int *) extra);
462 textporter_exit_on_segv_handler(SIGNAL_ARGS)
465 (errcode(ERRCODE_INTERNAL_ERROR),
466 errmsg("terminating PostgreSQL server process due to "
467 "segmentation fault by textporter")));
470 #else /* TEXTPORTER */
473 pgs2textporter1(PG_FUNCTION_ARGS)
478 #endif /* TEXTPORTER */
481 GetSennaEncoding(void)
483 static sen_encoding encoding = sen_enc_default;
485 if (encoding == sen_enc_default)
487 if (GetDatabaseEncoding() == PG_UTF8)
488 encoding = sen_enc_utf8;
491 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
492 errmsg("does not support database encoding \"%s\"",
493 GetDatabaseEncodingName())));
499 * Escape the backslash and double quote characters in the given string.
501 * Return false if the given string has no character which needs to be
502 * escaped. Otherwise, return true. In this case, **s points the palloc'd
503 * space storing the escaped keyword string and *slen is set to the size
504 * of that string. The caller needs to free the palloc'd space.
507 EscapeSnippetKeyword(char **s, size_t *slen)
514 bool in_doublequote = false;
515 bool in_sennaops = false;
516 bool need_escape = false;
519 * Skip the heading double quote character because it always doesn't
520 * need to be interpreted as a character itself and be escaped.
521 * Note that we must not skip the heading character if it's not a
525 if (ISDOUBLEQUOTECHAR(sp))
529 * Check whether the snippet keyword string has a character which
530 * needs to be escaped.
532 while ((sp - *s) < *slen)
534 mblen = pg_mblen(sp);
537 * Backslash in the keyword always needs to be escaped.
539 if (ISBACKSLASHCHAR(sp))
547 if (ISSENNAOPSCHAR(sp))
550 in_doublequote = false;
555 * Double quote in the keyword needs to be escaped if
556 * any Senna search operators are to neither its right
565 if (ISDOUBLEQUOTECHAR(sp) && !in_sennaops)
566 in_doublequote = true;
567 if (!ISSENNAOPSCHAR(sp))
575 * Quick exit if the keyword has no character which needs to be
582 * Allocate the buffer space to store the escaped snippet keyword string.
583 * The maximum size of escaped string is double the input keyword size.
584 * The size reaches the maximum when every character in the input keyword
585 * needs to be escaped.
587 ep = escaped = (char *) palloc(*slen * 2);
590 * Copy the characters which have been passed through in the above loop
591 * and don't need to be escaped, into the buffer. If in_doublequote is
592 * true, we don't copy the double quote in the previous position into the
593 * buffer because it might still need to be escaped.
595 copylen = sp - *s - ((in_doublequote) ? 1 : 0);
596 memcpy(ep, *s, copylen);
600 * Construct the escaped snippet keyword string.
602 while ((sp - *s) < *slen)
604 mblen = pg_mblen(sp);
609 * dqchar indicates the previous character, that is a double
610 * quote. We assume here that a double quote is single-byte
613 char dqchar = *(sp - 1);
615 if (ISSENNAOPSCHAR(sp))
618 * Don't escape the double quote which is just before Senna
624 in_doublequote = false;
629 * Escape the double quote if no Senna operator is next to it.
634 if (ISDOUBLEQUOTECHAR(sp))
635 in_doublequote = true;
638 if (ISBACKSLASHCHAR(sp))
640 memcpy(ep, sp, mblen);
642 in_doublequote = false;
648 if (ISDOUBLEQUOTECHAR(sp))
651 * Don't escape the double quote which is just after Senna
657 in_doublequote = true;
661 if (ISBACKSLASHCHAR(sp))
664 * We don't check ISSENNAOPSCHAR() here. We handle Senna
665 * operator character as a character itself instead of
666 * an operator if it doesn't follow a double quote.
668 memcpy(ep, sp, mblen);
672 if (!ISSENNAOPSCHAR(sp))
679 /* Add the tailing double quote into the buffer */
687 if (pgs2_enable_debug == PGS2_ENABLE_DEBUG_ON)
689 char *tmp = pnstrdup(*s, *slen);
691 elog(LOG, "escaped snippet keyword: %s", tmp);
694 else if (pgs2_enable_debug == PGS2_ENABLE_DEBUG_TERSE)
695 elog(LOG, "escaped snippet keyword");
702 GetSennaQuery(char *str, size_t len)
704 static sen_query *query_cache = NULL;
705 static char *key_cache = NULL;
706 static size_t len_cache = 0;
707 static bool guc_cache = false;
709 sen_encoding encoding;
713 bool needfree = false;
716 * Return the cached Senna query if the same keyword has
717 * been used the last time.
719 if (key_cache != NULL &&
721 strncmp(key_cache, str, len) == 0 &&
722 escape_snippet_keyword == guc_cache)
725 if (pgs2_enable_debug == PGS2_ENABLE_DEBUG_ON)
727 char *tmp = pnstrdup(str, len);
729 elog(LOG, "GetSennaQuery(): quick exit: %s", tmp);
732 else if (pgs2_enable_debug == PGS2_ENABLE_DEBUG_TERSE)
733 elog(LOG, "GetSennaQuery(): quick exit");
738 encoding = GetSennaEncoding();
743 (errcode(ERRCODE_OUT_OF_MEMORY),
744 errmsg("out of memory")));
747 * We always cache the unescaped keyword. Which enables us
748 * to check whether we can use the cached Senna query before
749 * escaping the keyword.
751 memcpy(key, str, len);
755 * If the keyword has been escaped, 'str' points to the
756 * newly-palloc'd space storing the escaped keyword. This
757 * space needs to be freed later.
759 if (escape_snippet_keyword)
760 needfree = EscapeSnippetKeyword(&str, &len);
762 query = sen_query_open(str, len, sen_sel_or, SEN_MAX_N_EXPRS,
768 (errmsg("sen_query_open() failed")));
771 if ((rest = sen_query_rest(query, NULL)) != 0)
773 (errmsg("too many expressions (%d)", rest)));
775 if (query_cache != NULL)
777 sen_query_close(query_cache);
784 guc_cache = escape_snippet_keyword;
793 pgs2snippet1(PG_FUNCTION_ARGS)
795 int flags = PG_GETARG_INT32(0);
796 uint32 width = PG_GETARG_UINT32(1);
797 uint32 max_results = PG_GETARG_UINT32(2);
798 text *opentags = PG_GETARG_TEXT_P(3);
799 text *closetags = PG_GETARG_TEXT_P(4);
800 int mapping = PG_GETARG_INT32(5);
801 text *keywords = PG_GETARG_TEXT_P(6);
802 text *document = PG_GETARG_TEXT_P(7);
804 sen_snip *snip = NULL;
805 const char *opentags_str = VARDATA_ANY(opentags);
806 const char *closetags_str = VARDATA_ANY(closetags);
807 char *keywords_str = VARDATA_ANY(keywords);
808 char *document_str = VARDATA_ANY(document);
809 uint32 opentags_len = VARSIZE_ANY_EXHDR(opentags);
810 uint32 closetags_len = VARSIZE_ANY_EXHDR(closetags);
811 uint32 keywords_len = VARSIZE_ANY_EXHDR(keywords);
812 uint32 document_len = VARSIZE_ANY_EXHDR(document);
814 uint32 max_tagged_len = 0;
817 uint32 result_len = 0;
818 bool return_null = false;
820 query = GetSennaQuery(keywords_str, keywords_len);
822 snip = sen_query_snip(query, flags, width, max_results, 1,
823 &opentags_str, &opentags_len,
824 &closetags_str, &closetags_len,
825 mapping == 0 ? NULL : (sen_snip_mapping *)-1);
828 (errmsg("sen_query_snip() failed")));
832 rc = sen_snip_exec(snip, document_str, document_len,
833 &nresults, &max_tagged_len);
834 if (rc != sen_success)
836 (errmsg("sen_snip_exec() failed: %d", rc)));
838 result = (text *) palloc(max_tagged_len + VARHDRSZ);
840 rc = sen_snip_get_result(snip, 0, VARDATA(result), &result_len);
841 if (rc == sen_invalid_argument)
843 else if (rc != sen_success)
845 (errmsg("sen_snip_get_result() failed: %d", rc)));
849 sen_snip_close(snip);
854 sen_snip_close(snip);
859 SET_VARSIZE(result, max_tagged_len + VARHDRSZ);
861 PG_RETURN_TEXT_P(result);
865 * Make sure there is enough space for 'needed' more bytes.
867 * Sets **buf to the allocated space which can store the needed bytes if OK,
868 * NULL if failed to enlarge the space because 'needed' is larger than 'maxlen'.
871 pgs2malloc(void **buf, long *buflen, long needed, long maxlen)
874 if (pgs2_enable_debug == PGS2_ENABLE_DEBUG_ON)
875 elog(LOG, "pgs2malloc(): buflen %ld, needed %ld, maxlen %ld",
876 *buflen, needed, maxlen);
877 else if (pgs2_enable_debug == PGS2_ENABLE_DEBUG_TERSE)
878 elog(LOG, "pgs2malloc()");
881 if (*buf != NULL && *buflen >= needed && (*buflen <= maxlen || maxlen == 0))
882 return; /* got enough space already */
885 * Release the already-allocated space since it's too small to
886 * store the needed bytes or larger than the upper limit.
896 * Don't allocate any space if the needed space is larger than
899 if (needed > maxlen && maxlen != 0)
903 * Allocate the space for the needed bytes.
905 * We don't want to allocate just a little more space with each enlarge;
906 * for efficiency, double the buffer size each time it overflows.
907 * Actually, we might need to more than double it if 'needed' is big...
909 * We check whether '*buflen' overflows each cycle to avoid infinite loop.
912 while (*buflen < needed && *buflen != 0)
916 * Clamp to maxlen in case we went past it. Note we are assuming
917 * here that maxlen <= LONG_MAX/2, else the above loop could
918 * overflow. We will still have *buflen >= needed.
920 if (*buflen > maxlen && maxlen != 0)
923 /* Guard against out-of-range '*buflen' value */
926 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
927 errmsg("out of memory"),
928 errdetail("Cannot enlarge buffer by %ld more bytes.",
931 *buf = (void *) malloc(*buflen);
934 (errcode(ERRCODE_OUT_OF_MEMORY),
935 errmsg("out of memory")));
939 pgs2norm(PG_FUNCTION_ARGS)
941 text *str = PG_GETARG_TEXT_PP(0);
942 char *s = VARDATA_ANY(str);
943 long slen = VARSIZE_ANY_EXHDR(str);
951 * norm_cache is the cache memory storing both input and normalized strings
952 * as the result of pgs2norm(). norm_cache_size is the size of norm_cache
953 * and its upper limit is specified by norm_cache_limit parameter. norm_result
954 * is the pointer to the normalized string with the verlena header (i.e.,
955 * text type) stored in the latter half of the cache. norm_reslen is the size
956 * of norm_result. norm_slen is the size of the input string which is stored
957 * in the first half of the cache.
959 static char *norm_cache = NULL;
960 static long norm_cache_size = 0;
961 static long norm_slen = 0;
962 static char *norm_result = NULL;
963 static long norm_reslen = 0;
966 * Return the cached normalization result if the same string of
967 * the given one has been normalized the last time.
969 if (norm_cache != NULL &&
971 strncmp(norm_cache, s, slen) == 0)
974 if (pgs2_enable_debug == PGS2_ENABLE_DEBUG_ON)
976 char *tmp = text_to_cstring(str);
978 elog(LOG, "pgs2norm(): quick exit: %s", tmp);
981 else if (pgs2_enable_debug == PGS2_ENABLE_DEBUG_TERSE)
982 elog(LOG, "pgs2norm(): quick exit");
985 result = (text *) palloc(norm_reslen);
986 memcpy(result, norm_result, norm_reslen);
987 PG_RETURN_TEXT_P(result);
990 /* Confirm that database encoding is UTF-8 */
994 * Allocate the result buffer to store the normalized string. Since the size of
995 * normalized string can be larger than that of input one, the result buffer needs
996 * extra space. Problem is that, before calling sen_str_normalize, we need to
997 * allocate the result buffer but cannot know how large extra space is required.
998 * So we use RESULT_EXTRA_SIZE as the estimated size of extra space here.
1000 #define RESULT_EXTRA_SIZE 64
1001 buflen = slen + RESULT_EXTRA_SIZE;
1004 result = (text *) palloc(buflen + VARHDRSZ);
1006 #if defined(FAST_SENNA)
1007 reslen = fast_sen_str_normalize(s, slen, VARDATA(result), buflen);
1009 reslen = sen_str_normalize(s, slen, sen_enc_utf8,
1010 SEN_NORMALIZE_FLAGS,
1011 VARDATA(result), buflen);
1016 (errmsg("could not normalize the string")));
1019 * If the result buffer size is too short to store the normalized string,
1020 * we enlarge the buffer and retry the string normalization.
1022 if (buflen <= reslen)
1025 buflen = reslen + 1;
1029 SET_VARSIZE(result, reslen + VARHDRSZ);
1032 * Cache both input and normalized strings to accelerate the subsequent
1033 * calls of pgs2norm() with the same input string. But we don't do that
1034 * if the maximum allowed size of the cache is too small to store them.
1036 needed = slen + reslen + VARHDRSZ;
1037 maxlen = ((norm_cache_limit >= 0) ? norm_cache_limit : work_mem) * 1024L;
1039 pgs2malloc((void **) &norm_cache, &norm_cache_size, needed, maxlen);
1040 if (norm_cache != NULL)
1042 /* Store the input string into the first half of the cache */
1044 memcpy(norm_cache, s, slen);
1047 * Store the normalized string with the varlena header (i.e., text type)
1048 * into the latter half of the cache.
1050 norm_result = norm_cache + slen;
1051 norm_reslen = reslen + VARHDRSZ;
1052 memcpy(norm_result, result, norm_reslen);
1056 if (pgs2_enable_debug == PGS2_ENABLE_DEBUG_ON)
1058 char *tmp = text_to_cstring(str);
1060 elog(LOG, "pgs2norm(): complete (%s result cache): %s",
1061 (norm_cache == NULL) ? "unset" : "set", tmp);
1064 else if (pgs2_enable_debug == PGS2_ENABLE_DEBUG_TERSE)
1065 elog(LOG, "pgs2norm(): complete");
1068 PG_RETURN_TEXT_P(result);
1072 * Report the version and configure options of Senna which
1073 * ludia_funcs depends on.
1076 pgs2seninfo(PG_FUNCTION_ARGS)
1078 char *version[MAXPGPATH];
1079 char *coptions[MAXPGPATH];
1086 * Get the version and configure options of Senna. Ignore the
1087 * return value of sen_info() because it always returns a success.
1089 sen_info((char **)&version, (char **)&coptions, NULL, NULL, NULL, NULL);
1092 * Construct a tuple descriptor for the result row. This must
1093 * match this function's ludia_funcs--x.x.sql entry.
1095 #if PG_VERSION_NUM >= 120000
1096 tupdesc = CreateTemplateTupleDesc(2);
1098 tupdesc = CreateTemplateTupleDesc(2, false);
1100 TupleDescInitEntry(tupdesc, (AttrNumber) 1,
1101 "version", TEXTOID, -1, 0);
1102 TupleDescInitEntry(tupdesc, (AttrNumber) 2,
1103 "configure_options", TEXTOID, -1, 0);
1104 tupdesc = BlessTupleDesc(tupdesc);
1107 values[0] = CStringGetTextDatum(*version);
1110 /* configure option */
1111 values[1] = CStringGetTextDatum(*coptions);
1114 tuple = heap_form_tuple(tupdesc, values, isnull);
1115 PG_RETURN_DATUM(HeapTupleGetDatum(tuple));