Fix up hash functions for datetime datatypes so that they don't take

author Tom Lane <tgl@sss.pgh.pa.us>

Fri, 6 Jul 2007 04:16:00 +0000 (04:16 +0000)

committer Tom Lane <tgl@sss.pgh.pa.us>

Fri, 6 Jul 2007 04:16:00 +0000 (04:16 +0000)
author Tom Lane <tgl@sss.pgh.pa.us>
Fri, 6 Jul 2007 04:16:00 +0000 (04:16 +0000)
committer Tom Lane <tgl@sss.pgh.pa.us>
Fri, 6 Jul 2007 04:16:00 +0000 (04:16 +0000)
diff --git a/src/backend/utils/adt/date.c b/src/backend/utils/adt/date.c

index a66680f..efc965a 100644 (file)
--- a/src/backend/utils/adt/date.c
+++ b/src/backend/utils/adt/date.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/utils/adt/date.c,v 1.133 2007/06/15 20:56:50 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/utils/adt/date.c,v 1.134 2007/07/06 04:15:58 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -1198,6 +1198,17 @@ time_cmp(PG_FUNCTION_ARGS)
  }
  
  Datum
+time_hash(PG_FUNCTION_ARGS)
+{
+       /* We can use either hashint8 or hashfloat8 directly */
+#ifdef HAVE_INT64_TIMESTAMP
+       return hashint8(fcinfo);
+#else
+       return hashfloat8(fcinfo);
+#endif
+}
+
+Datum
  time_larger(PG_FUNCTION_ARGS)
  {
         TimeADT         time1 = PG_GETARG_TIMEADT(0);
@@ -1960,20 +1971,27 @@ timetz_cmp(PG_FUNCTION_ARGS)
         PG_RETURN_INT32(timetz_cmp_internal(time1, time2));
  }
  
-/*
- * timetz, being an unusual size, needs a specialized hash function.
- */
  Datum
  timetz_hash(PG_FUNCTION_ARGS)
  {
         TimeTzADT  *key = PG_GETARG_TIMETZADT_P(0);
+       uint32          thash;
  
         /*
-        * Specify hash length as sizeof(double) + sizeof(int4), not as
-        * sizeof(TimeTzADT), so that any garbage pad bytes in the structure won't
-        * be included in the hash!
+        * To avoid any problems with padding bytes in the struct,
+        * we figure the field hashes separately and XOR them.  This also
+        * provides a convenient framework for dealing with the fact that
+        * the time field might be either double or int64.
          */
-       return hash_any((unsigned char *) key, sizeof(key->time) + sizeof(key->zone));
+#ifdef HAVE_INT64_TIMESTAMP
+       thash = DatumGetUInt32(DirectFunctionCall1(hashint8,
+                                                                                          Int64GetDatumFast(key->time)));
+#else
+       thash = DatumGetUInt32(DirectFunctionCall1(hashfloat8,
+                                                                                          Float8GetDatumFast(key->time)));
+#endif
+       thash ^= DatumGetUInt32(hash_uint32(key->zone));
+       PG_RETURN_UINT32(thash);
  }
  
  Datum
diff --git a/src/backend/utils/adt/timestamp.c b/src/backend/utils/adt/timestamp.c

index d201935..04eaa81 100644 (file)
--- a/src/backend/utils/adt/timestamp.c
+++ b/src/backend/utils/adt/timestamp.c
@@ -8,7 +8,7 @@
   *
   *
   * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/utils/adt/timestamp.c,v 1.178 2007/06/15 20:56:50 tgl Exp $
+ *       $PostgreSQL: pgsql/src/backend/utils/adt/timestamp.c,v 1.179 2007/07/06 04:15:59 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -1839,6 +1839,17 @@ timestamp_cmp(PG_FUNCTION_ARGS)
         PG_RETURN_INT32(timestamp_cmp_internal(dt1, dt2));
  }
  
+Datum
+timestamp_hash(PG_FUNCTION_ARGS)
+{
+       /* We can use either hashint8 or hashfloat8 directly */
+#ifdef HAVE_INT64_TIMESTAMP
+       return hashint8(fcinfo);
+#else
+       return hashfloat8(fcinfo);
+#endif
+}
+
  
  /*
   * Crosstype comparison functions for timestamp vs timestamptz
@@ -2110,21 +2121,32 @@ interval_cmp(PG_FUNCTION_ARGS)
         PG_RETURN_INT32(interval_cmp_internal(interval1, interval2));
  }
  
-/*
- * interval, being an unusual size, needs a specialized hash function.
- */
  Datum
  interval_hash(PG_FUNCTION_ARGS)
  {
         Interval   *key = PG_GETARG_INTERVAL_P(0);
+       uint32          thash;
+       uint32          mhash;
  
         /*
-        * Specify hash length as sizeof(double) + sizeof(int4), not as
-        * sizeof(Interval), so that any garbage pad bytes in the structure won't
-        * be included in the hash!
+        * To avoid any problems with padding bytes in the struct,
+        * we figure the field hashes separately and XOR them.  This also
+        * provides a convenient framework for dealing with the fact that
+        * the time field might be either double or int64.
          */
-       return hash_any((unsigned char *) key,
-                                 sizeof(key->time) + sizeof(key->day) + sizeof(key->month));
+#ifdef HAVE_INT64_TIMESTAMP
+       thash = DatumGetUInt32(DirectFunctionCall1(hashint8,
+                                                                                          Int64GetDatumFast(key->time)));
+#else
+       thash = DatumGetUInt32(DirectFunctionCall1(hashfloat8,
+                                                                                          Float8GetDatumFast(key->time)));
+#endif
+       thash ^= DatumGetUInt32(hash_uint32(key->day));
+       /* Shift so "k days" and "k months" don't hash to the same thing */
+       mhash = DatumGetUInt32(hash_uint32(key->month));
+       thash ^= mhash << 24;
+       thash ^= mhash >> 8;
+       PG_RETURN_UINT32(thash);
  }
  
  /* overlaps_timestamp() --- implements the SQL92 OVERLAPS operator.
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h

index 2975721..e85988b 100644 (file)
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -37,7 +37,7 @@
   * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.413 2007/06/28 00:02:39 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.414 2007/07/06 04:15:59 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -53,6 +53,6 @@
   */
  
  /*                                                     yyyymmddN */
-#define CATALOG_VERSION_NO     200706271
+#define CATALOG_VERSION_NO     200707051
  
  #endif
diff --git a/src/include/catalog/pg_amproc.h b/src/include/catalog/pg_amproc.h

index 2645e89..c31a09f 100644 (file)
--- a/src/include/catalog/pg_amproc.h
+++ b/src/include/catalog/pg_amproc.h
@@ -22,7 +22,7 @@
   * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $PostgreSQL: pgsql/src/include/catalog/pg_amproc.h,v 1.65 2007/05/08 18:56:47 neilc Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/pg_amproc.h,v 1.66 2007/07/06 04:15:59 tgl Exp $
   *
   * NOTES
   *       the genbki.sh script reads this file and generates .bki
@@ -147,11 +147,11 @@ DATA(insert (     1987   19 19 1 455 ));
  DATA(insert (  1990   26 26 1 453 ));
  DATA(insert (  1992   30 30 1 457 ));
  DATA(insert (  1995   25 25 1 400 ));
-DATA(insert (  1997   1083 1083 1 452 ));
+DATA(insert (  1997   1083 1083 1 1688 ));
  DATA(insert (  1998   1700 1700 1 432 ));
-DATA(insert (  1999   1184 1184 1 452 ));
+DATA(insert (  1999   1184 1184 1 2039 ));
  DATA(insert (  2001   1266 1266 1 1696 ));
-DATA(insert (  2040   1114 1114 1 452 ));
+DATA(insert (  2040   1114 1114 1 2039 ));
  DATA(insert (  2222   16 16 1 454 ));
  DATA(insert (  2223   17 17 1 456 ));
  DATA(insert (  2224   22 22 1 398 ));
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h

index ecd1f11..024be90 100644 (file)
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.461 2007/06/28 00:02:39 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.462 2007/07/06 04:15:59 tgl Exp $
   *
   * NOTES
   *       The script catalog/genbki.sh reads this file and generates .bki
@@ -2474,6 +2474,8 @@ DESCR("greater-than-or-equal");
  DATA(insert OID = 1693 (  btboolcmp                    PGNSP PGUID 12 1 0 f f t f i 2 23 "16 16" _null_ _null_ _null_  btboolcmp - _null_ ));
  DESCR("btree less-equal-greater");
  
+DATA(insert OID = 1688 (  time_hash                    PGNSP PGUID 12 1 0 f f t f i 1 23 "1083" _null_ _null_ _null_ time_hash - _null_ ));
+DESCR("hash");
  DATA(insert OID = 1696 (  timetz_hash          PGNSP PGUID 12 1 0 f f t f i 1 23 "1266" _null_ _null_ _null_ timetz_hash - _null_ ));
  DESCR("hash");
  DATA(insert OID = 1697 (  interval_hash                PGNSP PGUID 12 1 0 f f t f i 1 23 "1186" _null_ _null_ _null_ interval_hash - _null_ ));
@@ -3043,6 +3045,8 @@ DATA(insert OID = 2037 (  timezone                        PGNSP PGUID 12 1 0 f f t f v 2 1266 "25 126
  DESCR("adjust time with time zone to new zone");
  DATA(insert OID = 2038 (  timezone                     PGNSP PGUID 12 1 0 f f t f i 2 1266 "1186 1266" _null_ _null_ _null_    timetz_izone - _null_ ));
  DESCR("adjust time with time zone to new zone");
+DATA(insert OID = 2039 (  timestamp_hash       PGNSP PGUID 12 1 0 f f t f i 1  23 "1114" _null_ _null_ _null_ timestamp_hash - _null_ ));
+DESCR("hash");
  DATA(insert OID = 2041 ( overlaps                      PGNSP PGUID 12 1 0 f f f f i 4 16 "1114 1114 1114 1114" _null_ _null_ _null_    overlaps_timestamp - _null_ ));
  DESCR("SQL92 interval comparison");
  DATA(insert OID = 2042 ( overlaps                      PGNSP PGUID 14 1 0 f f f f i 4 16 "1114 1186 1114 1186" _null_ _null_ _null_    "select ($1, ($1 + $2)) overlaps ($3, ($3 + $4))" - _null_ ));
diff --git a/src/include/utils/date.h b/src/include/utils/date.h

index c7f9f73..fd91f79 100644 (file)
--- a/src/include/utils/date.h
+++ b/src/include/utils/date.h
@@ -7,7 +7,7 @@
   * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $PostgreSQL: pgsql/src/include/utils/date.h,v 1.37 2007/06/05 21:31:08 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/utils/date.h,v 1.38 2007/07/06 04:16:00 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -148,6 +148,7 @@ extern Datum time_le(PG_FUNCTION_ARGS);
  extern Datum time_gt(PG_FUNCTION_ARGS);
  extern Datum time_ge(PG_FUNCTION_ARGS);
  extern Datum time_cmp(PG_FUNCTION_ARGS);
+extern Datum time_hash(PG_FUNCTION_ARGS);
  extern Datum overlaps_time(PG_FUNCTION_ARGS);
  extern Datum time_larger(PG_FUNCTION_ARGS);
  extern Datum time_smaller(PG_FUNCTION_ARGS);
diff --git a/src/include/utils/timestamp.h b/src/include/utils/timestamp.h

index 629aa36..5ba8ccd 100644 (file)
--- a/src/include/utils/timestamp.h
+++ b/src/include/utils/timestamp.h
@@ -6,7 +6,7 @@
   * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
   * Portions Copyright (c) 1994, Regents of the University of California
   *
- * $PostgreSQL: pgsql/src/include/utils/timestamp.h,v 1.70 2007/06/05 21:31:08 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/utils/timestamp.h,v 1.71 2007/07/06 04:16:00 tgl Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -211,6 +211,7 @@ extern Datum timestamp_ge(PG_FUNCTION_ARGS);
  extern Datum timestamp_gt(PG_FUNCTION_ARGS);
  extern Datum timestamp_finite(PG_FUNCTION_ARGS);
  extern Datum timestamp_cmp(PG_FUNCTION_ARGS);
+extern Datum timestamp_hash(PG_FUNCTION_ARGS);
  extern Datum timestamp_smaller(PG_FUNCTION_ARGS);
  extern Datum timestamp_larger(PG_FUNCTION_ARGS);
  
diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out

index 8b5f644..247c8c9 100644 (file)
--- a/src/test/regress/expected/opr_sanity.out
+++ b/src/test/regress/expected/opr_sanity.out
@@ -980,10 +980,9 @@ WHERE p3.opfmethod = (SELECT oid FROM pg_am WHERE amname = 'btree')
  -- For hash we can also do a little better: the support routines must be
  -- of the form hash(lefttype) returns int4.  There are several cases where
  -- we cheat and use a hash function that is physically compatible with the
--- datatype even though there's no cast, so for now we can't check that.
-SELECT p1.amprocfamily, p1.amprocnum,
-       p2.oid, p2.proname,
-       p3.opfname
+-- datatype even though there's no cast, so this check does find a small
+-- number of entries.
+SELECT p1.amprocfamily, p1.amprocnum, p2.proname, p3.opfname
  FROM pg_amproc AS p1, pg_proc AS p2, pg_opfamily AS p3
  WHERE p3.opfmethod = (SELECT oid FROM pg_am WHERE amname = 'hash')
      AND p1.amprocfamily = p3.oid AND p1.amproc = p2.oid AND
@@ -991,11 +990,20 @@ WHERE p3.opfmethod = (SELECT oid FROM pg_am WHERE amname = 'hash')
       OR proretset
       OR prorettype != 'int4'::regtype
       OR pronargs != 1
---   OR NOT physically_coercible(amproclefttype, proargtypes[0])
-     OR amproclefttype != amprocrighttype);
- amprocfamily | amprocnum | oid | proname | opfname 
---------------+-----------+-----+---------+---------
-(0 rows)
+     OR NOT physically_coercible(amproclefttype, proargtypes[0])
+     OR amproclefttype != amprocrighttype)
+ORDER BY 1;
+ amprocfamily | amprocnum |    proname     |      opfname       
+--------------+-----------+----------------+--------------------
+          435 |         1 | hashint4       | date_ops
+         1999 |         1 | timestamp_hash | timestamptz_ops
+         2222 |         1 | hashchar       | bool_ops
+         2223 |         1 | hashvarlena    | bytea_ops
+         2225 |         1 | hashint4       | xid_ops
+         2226 |         1 | hashint4       | cid_ops
+         2229 |         1 | hashvarlena    | text_pattern_ops
+         2231 |         1 | hashvarlena    | bpchar_pattern_ops
+(8 rows)
  
  -- Support routines that are primary members of opfamilies must be immutable
  -- (else it suggests that the index ordering isn't fixed).  But cross-type
diff --git a/src/test/regress/sql/opr_sanity.sql b/src/test/regress/sql/opr_sanity.sql

index f93a71d..54c60c1 100644 (file)
--- a/src/test/regress/sql/opr_sanity.sql
+++ b/src/test/regress/sql/opr_sanity.sql
@@ -790,11 +790,10 @@ WHERE p3.opfmethod = (SELECT oid FROM pg_am WHERE amname = 'btree')
  -- For hash we can also do a little better: the support routines must be
  -- of the form hash(lefttype) returns int4.  There are several cases where
  -- we cheat and use a hash function that is physically compatible with the
--- datatype even though there's no cast, so for now we can't check that.
+-- datatype even though there's no cast, so this check does find a small
+-- number of entries.
  
-SELECT p1.amprocfamily, p1.amprocnum,
-       p2.oid, p2.proname,
-       p3.opfname
+SELECT p1.amprocfamily, p1.amprocnum, p2.proname, p3.opfname
  FROM pg_amproc AS p1, pg_proc AS p2, pg_opfamily AS p3
  WHERE p3.opfmethod = (SELECT oid FROM pg_am WHERE amname = 'hash')
      AND p1.amprocfamily = p3.oid AND p1.amproc = p2.oid AND
@@ -802,8 +801,9 @@ WHERE p3.opfmethod = (SELECT oid FROM pg_am WHERE amname = 'hash')
       OR proretset
       OR prorettype != 'int4'::regtype
       OR pronargs != 1
---   OR NOT physically_coercible(amproclefttype, proargtypes[0])
-     OR amproclefttype != amprocrighttype);
+     OR NOT physically_coercible(amproclefttype, proargtypes[0])
+     OR amproclefttype != amprocrighttype)
+ORDER BY 1;
  
  -- Support routines that are primary members of opfamilies must be immutable
  -- (else it suggests that the index ordering isn't fixed).  But cross-type
author	Tom Lane <tgl@sss.pgh.pa.us>
	Fri, 6 Jul 2007 04:16:00 +0000 (04:16 +0000)
committer	Tom Lane <tgl@sss.pgh.pa.us>
	Fri, 6 Jul 2007 04:16:00 +0000 (04:16 +0000)
src/backend/utils/adt/date.c		patch \| blob \| history
src/backend/utils/adt/timestamp.c		patch \| blob \| history
src/include/catalog/catversion.h		patch \| blob \| history
src/include/catalog/pg_amproc.h		patch \| blob \| history
src/include/catalog/pg_proc.h		patch \| blob \| history
src/include/utils/date.h		patch \| blob \| history
src/include/utils/timestamp.h		patch \| blob \| history
src/test/regress/expected/opr_sanity.out		patch \| blob \| history
src/test/regress/sql/opr_sanity.sql		patch \| blob \| history