OSDN Git Service

Update BIGM_LAST_UPDATE to '2020.02.28'.
[pgbigm/pg_bigm.git] / bigm_op.c
index 8556740..a9ae73a 100644 (file)
--- a/bigm_op.c
+++ b/bigm_op.c
@@ -1,12 +1,13 @@
 /*-------------------------------------------------------------------------
  *
+ * Portions Copyright (c) 2017-2020, pg_bigm Development Group
+ * Portions Copyright (c) 2013-2016, NTT DATA Corporation
  * Portions Copyright (c) 2004-2012, PostgreSQL Global Development Group
- * Portions Copyright (c) 2013-2015, NTT DATA Corporation
  *
  * Changelog:
- *   2013/01/09
- *   Support full text search using bigrams.
- *   Author: NTT DATA Corporation
+ *      2013/01/09
+ *      Support full text search using bigrams.
+ *      Author: NTT DATA Corporation
  *
  *-------------------------------------------------------------------------
  */
 #include "catalog/pg_type.h"
 #include "tsearch/ts_locale.h"
 #include "utils/array.h"
-
+#include "utils/memutils.h"
 
 PG_MODULE_MAGIC;
 
 /* Last update date of pg_bigm */
-#define        BIGM_LAST_UPDATE        "2013.11.22"
+#define BIGM_LAST_UPDATE       "2020.02.28"
 
 /* GUC variable */
-bool   bigm_enable_recheck = false;
-int            bigm_gin_key_limit = 0;
-double bigm_similarity_limit = 0.3;
-char   *bigm_last_update = NULL;
+bool           bigm_enable_recheck = false;
+int                    bigm_gin_key_limit = 0;
+double         bigm_similarity_limit = 0.3;
+char      *bigm_last_update = NULL;
 
 PG_FUNCTION_INFO_V1(show_bigm);
-Datum          show_bigm(PG_FUNCTION_ARGS);
-
 PG_FUNCTION_INFO_V1(bigmtextcmp);
-Datum          bigmtextcmp(PG_FUNCTION_ARGS);
-
 PG_FUNCTION_INFO_V1(likequery);
-Datum          likequery(PG_FUNCTION_ARGS);
-
 PG_FUNCTION_INFO_V1(bigm_similarity);
-Datum          bigm_similarity(PG_FUNCTION_ARGS);
-
 PG_FUNCTION_INFO_V1(bigm_similarity_op);
+
+/*
+ * The function prototypes are created as a part of PG_FUNCTION_INFO_V1
+ * macro since 9.4, and hence the declaration of the function prototypes
+ * here is necessary only for 9.3 or before.
+ */
+#if PG_VERSION_NUM < 90400
+Datum          show_bigm(PG_FUNCTION_ARGS);
+Datum          bigmtextcmp(PG_FUNCTION_ARGS);
+Datum          likequery(PG_FUNCTION_ARGS);
+Datum          bigm_similarity(PG_FUNCTION_ARGS);
 Datum          bigm_similarity_op(PG_FUNCTION_ARGS);
+#endif
 
 void           _PG_init(void);
 void           _PG_fini(void);
@@ -115,8 +120,8 @@ _PG_fini(void)
 static int
 comp_bigm(const void *a, const void *b, void *arg)
 {
-       int             res;
-       bool    *haveDups = (bool *) arg;
+       int                     res;
+       bool       *haveDups = (bool *) arg;
 
        res = CMPBIGM(a, b);
 
@@ -175,7 +180,7 @@ find_word(char *str, int lenstr, char **endword, int *charlen)
        return beginword;
 }
 
-/* 
+/*
  * The function is named compact_bigram to maintain consistency with pg_trgm,
  * though it does not reduce multibyte characters to hash values like in
  * compact_trigram.
@@ -247,7 +252,17 @@ generate_bigm(char *str, int slen)
        char       *bword,
                           *eword;
 
-       bgm = (BIGM *) palloc(VARHDRSZ + sizeof(bigm) * (Size) (slen / 2 + 1) * 3);
+       /*
+        * Guard against possible overflow in the palloc requests below.
+        * We need to prevent integer overflow in the multiplications here.
+        */
+       if ((Size) slen > (MaxAllocSize - VARHDRSZ) / sizeof(bigm) - 1 ||
+               (Size) slen > MaxAllocSize - 4)
+               ereport(ERROR,
+                               (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+                                errmsg("out of memory")));
+
+       bgm = (BIGM *) palloc(VARHDRSZ + sizeof(bigm) * (slen + 1));
        SET_VARSIZE(bgm, VARHDRSZ);
 
        if (slen + LPADDING + RPADDING < 2 || slen == 0)
@@ -277,7 +292,7 @@ generate_bigm(char *str, int slen)
                 * count bigrams
                 */
                bptr = make_bigrams(bptr, buf, bytelen + LPADDING + RPADDING,
-                                                        charlen + LPADDING + RPADDING);
+                                                       charlen + LPADDING + RPADDING);
        }
 
        pfree(buf);
@@ -290,7 +305,7 @@ generate_bigm(char *str, int slen)
         */
        if (len > 1)
        {
-               bool    haveDups = false;
+               bool            haveDups = false;
 
                qsort_arg((void *) GETARR(bgm), len, sizeof(bigm), comp_bigm, (void *) &haveDups);
                if (haveDups)
@@ -303,7 +318,7 @@ generate_bigm(char *str, int slen)
 }
 
 /*
- * Extract the next non-wildcard part of a search string, ie, a word bounded
+ * Extract the next non-wildcard part of a search string, i.e. a word bounded
  * by '_' or '%' meta-characters, non-word characters or string end.
  *
  * str: source string, of length lenstr bytes (need not be null-terminated)
@@ -324,8 +339,8 @@ get_wildcard_part(const char *str, int lenstr,
        const char *beginword = str;
        const char *endword;
        char       *s = buf;
-       bool        in_leading_wildcard_meta = false;
-       bool        in_trailing_wildcard_meta = false;
+       bool            in_leading_wildcard_meta = false;
+       bool            in_trailing_wildcard_meta = false;
        bool            in_escape = false;
        int                     clen;
 
@@ -402,8 +417,8 @@ get_wildcard_part(const char *str, int lenstr,
                        else
                        {
                                /*
-                                * Back up endword to the escape character when stopping at
-                                * an escaped char, so that subsequent get_wildcard_part will
+                                * Back up endword to the escape character when stopping at an
+                                * escaped char, so that subsequent get_wildcard_part will
                                 * restart from the escape character.  We assume here that
                                 * escape chars are single-byte.
                                 */
@@ -477,7 +492,17 @@ generate_wildcard_bigm(const char *str, int slen, bool *removeDups)
 
        *removeDups = false;
 
-       bgm = (BIGM *) palloc(VARHDRSZ + sizeof(bigm) * (Size) (slen / 2 + 1) * 3);
+       /*
+        * Guard against possible overflow in the palloc requests below.
+        * We need to prevent integer overflow in the multiplications here.
+        */
+       if ((Size) slen > (MaxAllocSize - VARHDRSZ) / sizeof(bigm) - 1 ||
+               (Size) slen > MaxAllocSize - 4)
+               ereport(ERROR,
+                               (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+                                errmsg("out of memory")));
+
+       bgm = (BIGM *) palloc(VARHDRSZ + sizeof(bigm) * (slen + 1));
        SET_VARSIZE(bgm, VARHDRSZ);
 
        if (slen + LPADDING + RPADDING < 2 || slen == 0)
@@ -510,7 +535,7 @@ generate_wildcard_bigm(const char *str, int slen, bool *removeDups)
         */
        if (len > 1)
        {
-               bool    haveDups = false;
+               bool            haveDups = false;
 
                qsort_arg((void *) GETARR(bgm), len, sizeof(bigm), comp_bigm, (void *) &haveDups);
                if (haveDups)
@@ -541,6 +566,7 @@ show_bigm(PG_FUNCTION_ARGS)
        for (i = 0, ptr = GETARR(bgm); i < ARRNELEM(bgm); i++, ptr++)
        {
                text       *item = cstring_to_text_with_len(ptr->str, ptr->bytelen);
+
                d[i] = PointerGetDatum(item);
        }
 
@@ -584,7 +610,7 @@ cnt_sml_bigm(BIGM *bgm1, BIGM *bgm2)
 
        while (ptr1 - GETARR(bgm1) < len1 && ptr2 - GETARR(bgm2) < len2)
        {
-               int             res = CMPBIGM(ptr1, ptr2);
+               int                     res = CMPBIGM(ptr1, ptr2);
 
                if (res < 0)
                        ptr1++;
@@ -682,34 +708,15 @@ likequery(PG_FUNCTION_ARGS)
        PG_RETURN_TEXT_P(result);
 }
 
-inline int
-bigmstrcmp(char *arg1, int len1, char *arg2, int len2)
-{
-       int                     i;
-       int                     len = Min(len1, len2);
-
-       for (i = 0; i < len; i++, arg1++, arg2++)
-       {
-               if (*arg1 == *arg2)
-                       continue;
-               if (*arg1 < *arg2)
-                       return -1;
-               else
-                       return 1;
-       }
-
-       return (len1 == len2) ? 0 : ((len1 < len2) ? -1 : 1);
-}
-
 Datum
 bigmtextcmp(PG_FUNCTION_ARGS)
 {
-       text    *arg1 = PG_GETARG_TEXT_PP(0);
-       text    *arg2 = PG_GETARG_TEXT_PP(1);
-       char    *a1p = VARDATA_ANY(arg1);
-       char    *a2p = VARDATA_ANY(arg2);
-       int             len1 = VARSIZE_ANY_EXHDR(arg1);
-       int             len2 = VARSIZE_ANY_EXHDR(arg2);
+       text       *arg1 = PG_GETARG_TEXT_PP(0);
+       text       *arg2 = PG_GETARG_TEXT_PP(1);
+       char       *a1p = VARDATA_ANY(arg1);
+       char       *a2p = VARDATA_ANY(arg2);
+       int                     len1 = VARSIZE_ANY_EXHDR(arg1);
+       int                     len2 = VARSIZE_ANY_EXHDR(arg2);
 
        PG_RETURN_INT32(bigmstrcmp(a1p, len1, a2p, len2));
 }