Update BIGM_LAST_UPDATE to '2020.02.28'.

[pgbigm/pg_bigm.git] / bigm_op.c
diff --git a/bigm_op.c b/bigm_op.c

index 8556740..a9ae73a 100644 (file)
--- a/bigm_op.c
+++ b/bigm_op.c
@@ -1,12 +1,13 @@
  /*-------------------------------------------------------------------------
   *
+ * Portions Copyright (c) 2017-2020, pg_bigm Development Group
+ * Portions Copyright (c) 2013-2016, NTT DATA Corporation
   * Portions Copyright (c) 2004-2012, PostgreSQL Global Development Group
- * Portions Copyright (c) 2013-2015, NTT DATA Corporation
   *
   * Changelog:
- *   2013/01/09
- *   Support full text search using bigrams.
- *   Author: NTT DATA Corporation
+ *      2013/01/09
+ *      Support full text search using bigrams.
+ *      Author: NTT DATA Corporation
   *
   *-------------------------------------------------------------------------
   */
@@ -19,33 +20,37 @@
  #include "catalog/pg_type.h"
  #include "tsearch/ts_locale.h"
  #include "utils/array.h"
-
+#include "utils/memutils.h"
  
  PG_MODULE_MAGIC;
  
  /* Last update date of pg_bigm */
-#define        BIGM_LAST_UPDATE        "2013.11.22"
+#define BIGM_LAST_UPDATE       "2020.02.28"
  
  /* GUC variable */
-bool   bigm_enable_recheck = false;
-int            bigm_gin_key_limit = 0;
-double bigm_similarity_limit = 0.3;
-char   *bigm_last_update = NULL;
+bool           bigm_enable_recheck = false;
+int                    bigm_gin_key_limit = 0;
+double         bigm_similarity_limit = 0.3;
+char      *bigm_last_update = NULL;
  
  PG_FUNCTION_INFO_V1(show_bigm);
-Datum          show_bigm(PG_FUNCTION_ARGS);
-
  PG_FUNCTION_INFO_V1(bigmtextcmp);
-Datum          bigmtextcmp(PG_FUNCTION_ARGS);
-
  PG_FUNCTION_INFO_V1(likequery);
-Datum          likequery(PG_FUNCTION_ARGS);
-
  PG_FUNCTION_INFO_V1(bigm_similarity);
-Datum          bigm_similarity(PG_FUNCTION_ARGS);
-
  PG_FUNCTION_INFO_V1(bigm_similarity_op);
+
+/*
+ * The function prototypes are created as a part of PG_FUNCTION_INFO_V1
+ * macro since 9.4, and hence the declaration of the function prototypes
+ * here is necessary only for 9.3 or before.
+ */
+#if PG_VERSION_NUM < 90400
+Datum          show_bigm(PG_FUNCTION_ARGS);
+Datum          bigmtextcmp(PG_FUNCTION_ARGS);
+Datum          likequery(PG_FUNCTION_ARGS);
+Datum          bigm_similarity(PG_FUNCTION_ARGS);
  Datum          bigm_similarity_op(PG_FUNCTION_ARGS);
+#endif
  
  void           _PG_init(void);
  void           _PG_fini(void);
@@ -115,8 +120,8 @@ _PG_fini(void)
  static int
  comp_bigm(const void *a, const void *b, void *arg)
  {
-       int             res;
-       bool    *haveDups = (bool *) arg;
+       int                     res;
+       bool       *haveDups = (bool *) arg;
  
         res = CMPBIGM(a, b);
  
@@ -175,7 +180,7 @@ find_word(char *str, int lenstr, char **endword, int *charlen)
         return beginword;
  }
  
-/* 
+/*
   * The function is named compact_bigram to maintain consistency with pg_trgm,
   * though it does not reduce multibyte characters to hash values like in
   * compact_trigram.
@@ -247,7 +252,17 @@ generate_bigm(char *str, int slen)
         char       *bword,
                            *eword;
  
-       bgm = (BIGM *) palloc(VARHDRSZ + sizeof(bigm) * (Size) (slen / 2 + 1) * 3);
+       /*
+        * Guard against possible overflow in the palloc requests below.
+        * We need to prevent integer overflow in the multiplications here.
+        */
+       if ((Size) slen > (MaxAllocSize - VARHDRSZ) / sizeof(bigm) - 1 ||
+               (Size) slen > MaxAllocSize - 4)
+               ereport(ERROR,
+                               (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+                                errmsg("out of memory")));
+
+       bgm = (BIGM *) palloc(VARHDRSZ + sizeof(bigm) * (slen + 1));
         SET_VARSIZE(bgm, VARHDRSZ);
  
         if (slen + LPADDING + RPADDING < 2 || slen == 0)
@@ -277,7 +292,7 @@ generate_bigm(char *str, int slen)
                  * count bigrams
                  */
                 bptr = make_bigrams(bptr, buf, bytelen + LPADDING + RPADDING,
-                                                        charlen + LPADDING + RPADDING);
+                                                       charlen + LPADDING + RPADDING);
         }
  
         pfree(buf);
@@ -290,7 +305,7 @@ generate_bigm(char *str, int slen)
          */
         if (len > 1)
         {
-               bool    haveDups = false;
+               bool            haveDups = false;
  
                 qsort_arg((void *) GETARR(bgm), len, sizeof(bigm), comp_bigm, (void *) &haveDups);
                 if (haveDups)
@@ -303,7 +318,7 @@ generate_bigm(char *str, int slen)
  }
  
  /*
- * Extract the next non-wildcard part of a search string, ie, a word bounded
+ * Extract the next non-wildcard part of a search string, i.e. a word bounded
   * by '_' or '%' meta-characters, non-word characters or string end.
   *
   * str: source string, of length lenstr bytes (need not be null-terminated)
@@ -324,8 +339,8 @@ get_wildcard_part(const char *str, int lenstr,
         const char *beginword = str;
         const char *endword;
         char       *s = buf;
-       bool        in_leading_wildcard_meta = false;
-       bool        in_trailing_wildcard_meta = false;
+       bool            in_leading_wildcard_meta = false;
+       bool            in_trailing_wildcard_meta = false;
         bool            in_escape = false;
         int                     clen;
  
@@ -402,8 +417,8 @@ get_wildcard_part(const char *str, int lenstr,
                         else
                         {
                                 /*
-                                * Back up endword to the escape character when stopping at
-                                * an escaped char, so that subsequent get_wildcard_part will
+                                * Back up endword to the escape character when stopping at an
+                                * escaped char, so that subsequent get_wildcard_part will
                                  * restart from the escape character.  We assume here that
                                  * escape chars are single-byte.
                                  */
@@ -477,7 +492,17 @@ generate_wildcard_bigm(const char *str, int slen, bool *removeDups)
  
         *removeDups = false;
  
-       bgm = (BIGM *) palloc(VARHDRSZ + sizeof(bigm) * (Size) (slen / 2 + 1) * 3);
+       /*
+        * Guard against possible overflow in the palloc requests below.
+        * We need to prevent integer overflow in the multiplications here.
+        */
+       if ((Size) slen > (MaxAllocSize - VARHDRSZ) / sizeof(bigm) - 1 ||
+               (Size) slen > MaxAllocSize - 4)
+               ereport(ERROR,
+                               (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+                                errmsg("out of memory")));
+
+       bgm = (BIGM *) palloc(VARHDRSZ + sizeof(bigm) * (slen + 1));
         SET_VARSIZE(bgm, VARHDRSZ);
  
         if (slen + LPADDING + RPADDING < 2 || slen == 0)
@@ -510,7 +535,7 @@ generate_wildcard_bigm(const char *str, int slen, bool *removeDups)
          */
         if (len > 1)
         {
-               bool    haveDups = false;
+               bool            haveDups = false;
  
                 qsort_arg((void *) GETARR(bgm), len, sizeof(bigm), comp_bigm, (void *) &haveDups);
                 if (haveDups)
@@ -541,6 +566,7 @@ show_bigm(PG_FUNCTION_ARGS)
         for (i = 0, ptr = GETARR(bgm); i < ARRNELEM(bgm); i++, ptr++)
         {
                 text       *item = cstring_to_text_with_len(ptr->str, ptr->bytelen);
+
                 d[i] = PointerGetDatum(item);
         }
  
@@ -584,7 +610,7 @@ cnt_sml_bigm(BIGM *bgm1, BIGM *bgm2)
  
         while (ptr1 - GETARR(bgm1) < len1 && ptr2 - GETARR(bgm2) < len2)
         {
-               int             res = CMPBIGM(ptr1, ptr2);
+               int                     res = CMPBIGM(ptr1, ptr2);
  
                 if (res < 0)
                         ptr1++;
@@ -682,34 +708,15 @@ likequery(PG_FUNCTION_ARGS)
         PG_RETURN_TEXT_P(result);
  }
  
-inline int
-bigmstrcmp(char *arg1, int len1, char *arg2, int len2)
-{
-       int                     i;
-       int                     len = Min(len1, len2);
-
-       for (i = 0; i < len; i++, arg1++, arg2++)
-       {
-               if (*arg1 == *arg2)
-                       continue;
-               if (*arg1 < *arg2)
-                       return -1;
-               else
-                       return 1;
-       }
-
-       return (len1 == len2) ? 0 : ((len1 < len2) ? -1 : 1);
-}
-
  Datum
  bigmtextcmp(PG_FUNCTION_ARGS)
  {
-       text    *arg1 = PG_GETARG_TEXT_PP(0);
-       text    *arg2 = PG_GETARG_TEXT_PP(1);
-       char    *a1p = VARDATA_ANY(arg1);
-       char    *a2p = VARDATA_ANY(arg2);
-       int             len1 = VARSIZE_ANY_EXHDR(arg1);
-       int             len2 = VARSIZE_ANY_EXHDR(arg2);
+       text       *arg1 = PG_GETARG_TEXT_PP(0);
+       text       *arg2 = PG_GETARG_TEXT_PP(1);
+       char       *a1p = VARDATA_ANY(arg1);
+       char       *a2p = VARDATA_ANY(arg2);
+       int                     len1 = VARSIZE_ANY_EXHDR(arg1);
+       int                     len2 = VARSIZE_ANY_EXHDR(arg2);
  
         PG_RETURN_INT32(bigmstrcmp(a1p, len1, a2p, len2));
  }