Adjust hash table sizing algorithm to avoid integer overflow in

author Tom Lane <tgl@sss.pgh.pa.us>

Sun, 29 Dec 2002 22:28:50 +0000 (22:28 +0000)

committer Tom Lane <tgl@sss.pgh.pa.us>

Sun, 29 Dec 2002 22:28:50 +0000 (22:28 +0000)
author Tom Lane <tgl@sss.pgh.pa.us>
Sun, 29 Dec 2002 22:28:50 +0000 (22:28 +0000)
committer Tom Lane <tgl@sss.pgh.pa.us>
Sun, 29 Dec 2002 22:28:50 +0000 (22:28 +0000)
diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c

index efdd3b3..9e0e4f0 100644 (file)
--- a/src/backend/executor/nodeHash.c
+++ b/src/backend/executor/nodeHash.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/executor/nodeHash.c,v 1.71 2002/12/15 16:17:46 tgl Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/executor/nodeHash.c,v 1.72 2002/12/29 22:28:50 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -20,6 +20,7 @@
   */
  #include "postgres.h"
  
+#include <limits.h>
  #include <math.h>
  
  #include "access/hash.h"
@@ -344,7 +345,8 @@ ExecChooseHashTableSize(double ntuples, int tupwidth,
  {
         int                     tupsize;
         double          inner_rel_bytes;
-       double          hash_table_bytes;
+       long            hash_table_bytes;
+       double          dtmp;
         int                     nbatch;
         int                     nbuckets;
         int                     totalbuckets;
@@ -362,20 +364,22 @@ ExecChooseHashTableSize(double ntuples, int tupwidth,
         inner_rel_bytes = ntuples * tupsize * FUDGE_FAC;
  
         /*
-        * Target hashtable size is SortMem kilobytes, but not less than
-        * sqrt(estimated inner rel size), so as to avoid horrible
-        * performance.
+        * Target in-memory hashtable size is SortMem kilobytes.
          */
-       hash_table_bytes = sqrt(inner_rel_bytes);
-       if (hash_table_bytes < (SortMem * 1024L))
-               hash_table_bytes = SortMem * 1024L;
+       hash_table_bytes = SortMem * 1024L;
  
         /*
          * Count the number of hash buckets we want for the whole relation,
          * for an average bucket load of NTUP_PER_BUCKET (per virtual
-        * bucket!).
+        * bucket!).  It has to fit in an int, however.
          */
-       totalbuckets = (int) ceil(ntuples * FUDGE_FAC / NTUP_PER_BUCKET);
+       dtmp = ceil(ntuples * FUDGE_FAC / NTUP_PER_BUCKET);
+       if (dtmp < INT_MAX)
+               totalbuckets = (int) dtmp;
+       else
+               totalbuckets = INT_MAX;
+       if (totalbuckets <= 0)
+               totalbuckets = 1;
  
         /*
          * Count the number of buckets we think will actually fit in the
@@ -409,10 +413,16 @@ ExecChooseHashTableSize(double ntuples, int tupwidth,
                  * that nbatch doesn't have to have anything to do with the ratio
                  * totalbuckets/nbuckets; in fact, it is the number of groups we
                  * will use for the part of the data that doesn't fall into the
-                * first nbuckets hash buckets.
+                * first nbuckets hash buckets.  We try to set it to make all the
+                * batches the same size.  But we have to keep nbatch small
+                * enough to avoid integer overflow in ExecHashJoinGetBatch().
                  */
-               nbatch = (int) ceil((inner_rel_bytes - hash_table_bytes) /
-                                                       hash_table_bytes);
+               dtmp = ceil((inner_rel_bytes - hash_table_bytes) /
+                                       hash_table_bytes);
+               if (dtmp < INT_MAX / totalbuckets)
+                       nbatch = (int) dtmp;
+               else
+                       nbatch = INT_MAX / totalbuckets;
                 if (nbatch <= 0)
                         nbatch = 1;
         }
author	Tom Lane <tgl@sss.pgh.pa.us>
	Sun, 29 Dec 2002 22:28:50 +0000 (22:28 +0000)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Sun, 29 Dec 2002 22:28:50 +0000 (22:28 +0000)