OSDN Git Service

Clean up some really grotty coding in catcache.c, improve hashing
authorTom Lane <tgl@sss.pgh.pa.us>
Mon, 21 Feb 2000 03:36:59 +0000 (03:36 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Mon, 21 Feb 2000 03:36:59 +0000 (03:36 +0000)
performance in catcache lookups.

src/backend/access/hash/hashfunc.c
src/backend/utils/adt/int.c
src/backend/utils/cache/catcache.c
src/include/access/hash.h
src/include/catalog/pg_proc.h
src/include/catalog/pg_type.h
src/include/utils/builtins.h
src/include/utils/catcache.h
src/include/utils/int8.h

index 8709dd7..78af635 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/access/hash/hashfunc.c,v 1.23 2000/01/26 05:55:55 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/access/hash/hashfunc.c,v 1.24 2000/02/21 03:36:46 tgl Exp $
  *
  * NOTES
  *       These functions are stored in pg_amproc.      For each operator class
@@ -146,8 +146,24 @@ hashoidvector(Oid *key)
        int                     i;
        uint32          result = 0;
 
-       for (i = 0; i < INDEX_MAX_KEYS; i++)
-               result = result ^ (~(uint32) key[i]);
+       for (i = INDEX_MAX_KEYS; --i >= 0; )
+               result = (result << 1) ^ (~(uint32) key[i]);
+       return result;
+}
+
+/*
+ * Note: hashint2vector currently can't be used as a user hash table
+ * hash function, because it has no pg_proc entry.  We only need it
+ * for catcache indexing.
+ */
+uint32
+hashint2vector(int16 *key)
+{
+       int                     i;
+       uint32          result = 0;
+
+       for (i = INDEX_MAX_KEYS; --i >= 0; )
+               result = (result << 1) ^ (~(uint32) key[i]);
        return result;
 }
 
@@ -158,13 +174,10 @@ hashoidvector(Oid *key)
 uint32
 hashchar(char key)
 {
-       int                     len;
        uint32          h;
 
-       h = 0;
-       len = sizeof(char);
        /* Convert char to integer */
-       h = h * PRIME1 ^ (key - ' ');
+       h = (key - ' ');
        h %= PRIME2;
 
        return h;
index 6b69624..2add22c 100644 (file)
@@ -8,7 +8,7 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/utils/adt/int.c,v 1.32 2000/01/26 05:57:14 momjian Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/utils/adt/int.c,v 1.33 2000/02/21 03:36:48 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -145,6 +145,17 @@ int2vectorout(int16 *int2Array)
 }
 
 /*
+ * We don't have a complete set of int2vector support routines,
+ * but we need int2vectoreq for catcache indexing.
+ */
+bool
+int2vectoreq(int16 *arg1, int16 *arg2)
+{
+       return (bool) (memcmp(arg1, arg2, INDEX_MAX_KEYS * sizeof(int16)) == 0);
+}
+
+
+/*
  *             int44in                 - converts "num num ..." to internal form
  *
  *             Note:
@@ -169,7 +180,7 @@ int44in(char *input_string)
 }
 
 /*
- *             int2vectorout           - converts internal form to "num num ..."
+ *             int44out                - converts internal form to "num num ..."
  */
 char *
 int44out(int32 *an_array)
@@ -489,13 +500,6 @@ int42ge(int32 arg1, int32 arg2)
        return arg1 >= arg2;
 }
 
-
-bool
-keyfirsteq(int16 *arg1, int16 arg2)
-{
-       return *arg1 == arg2;
-}
-
 /*
  *             int[24]pl               - returns arg1 + arg2
  *             int[24]mi               - returns arg1 - arg2
index c6c8763..297c92b 100644 (file)
@@ -8,12 +8,14 @@
  *
  *
  * IDENTIFICATION
- *       $Header: /cvsroot/pgsql/src/backend/utils/cache/catcache.c,v 1.61 2000/02/18 09:28:53 inoue Exp $
+ *       $Header: /cvsroot/pgsql/src/backend/utils/cache/catcache.c,v 1.62 2000/02/21 03:36:49 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 #include "postgres.h"
+
 #include "access/genam.h"
+#include "access/hash.h"
 #include "access/heapam.h"
 #include "access/valid.h"
 #include "catalog/pg_operator.h"
 static void CatCacheRemoveCTup(CatCache *cache, Dlelem *e);
 static Index CatalogCacheComputeHashIndex(struct catcache * cacheInP);
 static Index CatalogCacheComputeTupleHashIndex(struct catcache * cacheInOutP,
-                                                                 Relation relation, HeapTuple tuple);
+                                                                                          Relation relation,
+                                                                                          HeapTuple tuple);
 static void CatalogCacheInitializeCache(struct catcache * cache,
-                                                       Relation relation);
-static long comphash(long l, char *v);
+                                                                               Relation relation);
+static uint32 cc_hashname(NameData *n);
 
 /* ----------------
  *             variables, macros and other stuff
@@ -63,6 +66,7 @@ GlobalMemory CacheCxt;                        /* context in which caches are allocated */
 /* ----------------
  *             EQPROC is used in CatalogCacheInitializeCache to find the equality
  *             functions for system types that are used as cache key fields.
+ *             See also GetCCHashFunc, which should support the same set of types.
  *
  *             XXX this should be replaced by catalog lookups,
  *             but that seems to pose considerable risk of circularity...
@@ -70,7 +74,7 @@ GlobalMemory CacheCxt;                        /* context in which caches are allocated */
  */
 static const Oid eqproc[] = {
        F_BOOLEQ, InvalidOid, F_CHAREQ, F_NAMEEQ, InvalidOid,
-       F_INT2EQ, F_KEYFIRSTEQ, F_INT4EQ, F_OIDEQ, F_TEXTEQ,
+       F_INT2EQ, F_INT2VECTOREQ, F_INT4EQ, F_OIDEQ, F_TEXTEQ,
        F_OIDEQ, InvalidOid, InvalidOid, InvalidOid, F_OIDVECTOREQ
 };
 
@@ -80,6 +84,54 @@ static const Oid eqproc[] = {
  *                                     internal support functions
  * ----------------------------------------------------------------
  */
+
+static CCHashFunc
+GetCCHashFunc(Oid keytype)
+{
+       switch (keytype)
+       {
+               case BOOLOID:
+               case CHAROID:
+                       return (CCHashFunc) hashchar;
+               case NAMEOID:
+                       return (CCHashFunc) cc_hashname;
+               case INT2OID:
+                       return (CCHashFunc) hashint2;
+               case INT2VECTOROID:
+                       return (CCHashFunc) hashint2vector;
+               case INT4OID:
+                       return (CCHashFunc) hashint4;
+               case TEXTOID:
+                       return (CCHashFunc) hashtext;
+               case REGPROCOID:
+               case OIDOID:
+                       return (CCHashFunc) hashoid;
+               case OIDVECTOROID:
+                       return (CCHashFunc) hashoidvector;
+               default:
+                       elog(FATAL, "GetCCHashFunc: type %u unsupported as catcache key",
+                                keytype);
+                       return NULL;
+       }
+}
+
+static uint32
+cc_hashname(NameData *n)
+{
+       /*
+        * We need our own variant of hashname because we want to accept
+        * null-terminated C strings as search values for name fields.
+        * So, we have to make sure the data is correctly padded before
+        * we compute the hash value.
+        */
+       NameData        my_n;
+
+       namestrcpy(&my_n, NameStr(*n));
+
+       return hashname(&my_n);
+}
+
+
 /* --------------------------------
  *             CatalogCacheInitializeCache
  * --------------------------------
@@ -190,31 +242,20 @@ CatalogCacheInitializeCache(struct catcache * cache,
 
                if (cache->cc_key[i] > 0)
                {
+                       Oid             keytype = tupdesc->attrs[cache->cc_key[i] - 1]->atttypid;
 
-                       /*
-                        * Yoiks.  The implementation of the hashing code and the
-                        * implementation of int2vector's are at loggerheads.  The right
-                        * thing to do is to throw out the implementation of int2vector's
-                        * altogether; until that happens, we do the right thing here
-                        * to guarantee that the hash key generator doesn't try to
-                        * dereference an int2 by mistake.
-                        */
+                       cache->cc_hashfunc[i] = GetCCHashFunc(keytype);
 
-                       if (tupdesc->attrs[cache->cc_key[i] - 1]->atttypid == INT2VECTOROID)
-                               cache->cc_klen[i] = sizeof(short);
-                       else
-                               cache->cc_klen[i] = tupdesc->attrs[cache->cc_key[i] - 1]->attlen;
-
-                       cache->cc_skey[i].sk_procedure = EQPROC(tupdesc->attrs[cache->cc_key[i] - 1]->atttypid);
+                       /* If GetCCHashFunc liked the type, safe to index into eqproc[] */
+                       cache->cc_skey[i].sk_procedure = EQPROC(keytype);
 
                        fmgr_info(cache->cc_skey[i].sk_procedure,
                                          &cache->cc_skey[i].sk_func);
                        cache->cc_skey[i].sk_nargs = cache->cc_skey[i].sk_func.fn_nargs;
 
-                       CACHE5_elog(DEBUG, "CatalogCacheInit %s %d %d %x",
+                       CACHE4_elog(DEBUG, "CatalogCacheInit %s %d %x",
                                                RelationGetRelationName(relation),
                                                i,
-                                               tupdesc->attrs[cache->cc_key[i] - 1]->attlen,
                                                cache);
                }
        }
@@ -255,53 +296,6 @@ CatalogCacheInitializeCache(struct catcache * cache,
        MemoryContextSwitchTo(oldcxt);
 }
 
-/* ----------------
- * comphash
- *             Compute a hash value, somehow.
- *
- * XXX explain algorithm here.
- *
- * l is length of the attribute value, v
- * v is the attribute value ("Datum")
- * ----------------
- */
-static long
-comphash(long l, char *v)
-{
-       long            i;
-       NameData        n;
-
-       CACHE3_elog(DEBUG, "comphash (%d,%x)", l, v);
-
-       switch (l)
-       {
-               case 1:
-               case 2:
-               case 4:
-                       return (long) v;
-       }
-
-       if (l == NAMEDATALEN)
-       {
-
-               /*
-                * if it's a name, make sure that the values are null-padded.
-                *
-                * Note that this other fixed-length types can also have the same
-                * typelen so this may break them         - XXX
-                */
-               namestrcpy(&n, v);
-               v = NameStr(n);
-       }
-       else if (l < 0)
-               l = VARSIZE(v);
-
-       i = 0;
-       while (l--)
-               i += *v++;
-       return i;
-}
-
 /* --------------------------------
  *             CatalogCacheComputeHashIndex
  * --------------------------------
@@ -309,40 +303,37 @@ comphash(long l, char *v)
 static Index
 CatalogCacheComputeHashIndex(struct catcache * cacheInP)
 {
-       Index           hashIndex;
+       uint32          hashIndex = 0;
 
-       hashIndex = 0x0;
-       CACHE6_elog(DEBUG, "CatalogCacheComputeHashIndex %s %d %d %d %x",
+       CACHE4_elog(DEBUG, "CatalogCacheComputeHashIndex %s %d %x",
                                cacheInP->cc_relname,
                                cacheInP->cc_nkeys,
-                               cacheInP->cc_klen[0],
-                               cacheInP->cc_klen[1],
                                cacheInP);
 
        switch (cacheInP->cc_nkeys)
        {
                case 4:
-                       hashIndex ^= comphash(cacheInP->cc_klen[3],
-                                                (char *) cacheInP->cc_skey[3].sk_argument) << 9;
+                       hashIndex ^=
+                               (*cacheInP->cc_hashfunc[3])(cacheInP->cc_skey[3].sk_argument) << 9;
                        /* FALLTHROUGH */
                case 3:
-                       hashIndex ^= comphash(cacheInP->cc_klen[2],
-                                                (char *) cacheInP->cc_skey[2].sk_argument) << 6;
+                       hashIndex ^=
+                               (*cacheInP->cc_hashfunc[2])(cacheInP->cc_skey[2].sk_argument) << 6;
                        /* FALLTHROUGH */
                case 2:
-                       hashIndex ^= comphash(cacheInP->cc_klen[1],
-                                                (char *) cacheInP->cc_skey[1].sk_argument) << 3;
+                       hashIndex ^=
+                               (*cacheInP->cc_hashfunc[1])(cacheInP->cc_skey[1].sk_argument) << 3;
                        /* FALLTHROUGH */
                case 1:
-                       hashIndex ^= comphash(cacheInP->cc_klen[0],
-                                                         (char *) cacheInP->cc_skey[0].sk_argument);
+                       hashIndex ^=
+                               (*cacheInP->cc_hashfunc[0])(cacheInP->cc_skey[0].sk_argument);
                        break;
                default:
                        elog(FATAL, "CCComputeHashIndex: %d cc_nkeys", cacheInP->cc_nkeys);
                        break;
        }
-       hashIndex %= cacheInP->cc_size;
-       return hashIndex;
+       hashIndex %= (uint32) cacheInP->cc_size;
+       return (Index) hashIndex;
 }
 
 /* --------------------------------
@@ -645,8 +636,8 @@ do { \
                cp->relationId, cp->id, cp->cc_nkeys, cp->cc_size); \
        for (i = 0; i < nkeys; i += 1) \
        { \
-               elog(DEBUG, "InitSysCache: key=%d len=%d skey=[%d %d %d %d]\n", \
-                        cp->cc_key[i], cp->cc_klen[i], \
+               elog(DEBUG, "InitSysCache: key=%d skey=[%d %d %d %d]\n", \
+                        cp->cc_key[i], \
                         cp->cc_skey[i].sk_flags, \
                         cp->cc_skey[i].sk_attno, \
                         cp->cc_skey[i].sk_procedure, \
@@ -742,7 +733,8 @@ InitSysCache(char *relname,
        cp->cc_iscanfunc = iScanfuncP;
 
        /* ----------------
-        *      initialize the cache's key information
+        *      partially initialize the cache's key information
+        *      CatalogCacheInitializeCache() will do the rest
         * ----------------
         */
        for (i = 0; i < nkeys; ++i)
@@ -756,15 +748,7 @@ InitSysCache(char *relname,
                                elog(FATAL, "InitSysCache: called with %d key[%d]", key[i], i);
                        else
                        {
-                               cp->cc_klen[i] = sizeof(Oid);
-
-                               /*
-                                * ScanKeyEntryData and struct skey are equivalent. It
-                                * looks like a move was made to obsolete struct skey, but
-                                * it didn't reach this file.  Someday we should clean up
-                                * this code and consolidate to ScanKeyEntry - mer 10 Nov
-                                * 1991
-                                */
+                               cp->cc_hashfunc[i] = GetCCHashFunc(OIDOID);
                                ScanKeyEntryInitialize(&cp->cc_skey[i],
                                                                           (bits16) 0,
                                                                           (AttrNumber) key[i],
index c4aa369..7b9445e 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: hash.h,v 1.30 2000/01/26 05:57:50 momjian Exp $
+ * $Id: hash.h,v 1.31 2000/02/21 03:36:51 tgl Exp $
  *
  * NOTES
  *             modeled after Margo Seltzer's hash implementation for unix.
@@ -270,6 +270,7 @@ extern uint32 hashfloat4(float32 keyp);
 extern uint32 hashfloat8(float64 keyp);
 extern uint32 hashoid(Oid key);
 extern uint32 hashoidvector(Oid *key);
+extern uint32 hashint2vector(int16 *key);
 extern uint32 hashchar(char key);
 extern uint32 hashtext(struct varlena * key);
 extern uint32 hashname(NameData *n);
index 4fdbaaa..56423e0 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: pg_proc.h,v 1.123 2000/02/17 03:39:48 tgl Exp $
+ * $Id: pg_proc.h,v 1.124 2000/02/21 03:36:55 tgl Exp $
  *
  * NOTES
  *       The script catalog/genbki.sh reads this file and generates .bki
@@ -661,8 +661,8 @@ DATA(insert OID = 313 (  i2toi4                        PGUID 11 f t t 1 f  23  "21" 100 0 0 100  i
 DESCR("convert int2 to int4");
 DATA(insert OID = 314 (  i4toi2                           PGUID 11 f t t 1 f  21  "23" 100 0 0 100  i4toi2 - ));
 DESCR("convert int4 to int2");
-DATA(insert OID = 315 (  keyfirsteq               PGUID 11 f t f 2 f  16       "0 21" 100 0 0 100      keyfirsteq - ));
-DESCR("");
+DATA(insert OID = 315 (  int2vectoreq     PGUID 11 f t t 2 f  16  "22 22" 100 0 0 100  int2vectoreq - ));
+DESCR("equal");
 DATA(insert OID = 316 (  i4tod                    PGUID 11 f t t 1 f 701  "23" 100 0 0 100  i4tod - ));
 DESCR("convert int4 to float8");
 DATA(insert OID = 317 (  dtoi4                    PGUID 11 f t t 1 f  23 "701" 100 0 0 100  dtoi4 - ));
index 83dd6e6..0a94265 100644 (file)
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: pg_type.h,v 1.82 2000/02/20 06:28:41 tgl Exp $
+ * $Id: pg_type.h,v 1.83 2000/02/21 03:36:57 tgl Exp $
  *
  * NOTES
  *       the genbki.sh script reads this file and generates .bki
@@ -177,13 +177,6 @@ DESCR("-32 thousand to 32 thousand, 2-byte storage");
 
 DATA(insert OID = 22 ( int2vector PGUID INDEX_MAX_KEYS*2 -1 f b t \054 0  21 int2vectorin int2vectorout int2vectorin int2vectorout i _null_ ));
 DESCR("array of INDEX_MAX_KEYS int2 integers, used in system tables");
-/*
- * XXX -- the implementation of int2vector's in postgres is a hack, and will
- *               go away someday.      until that happens, there is a case (in the
- *               catalog cache management code) where we need to step gingerly
- *               over piles of int2vector's on the sidewalk.  in order to do so, we
- *               need the OID of the int2vector row from pg_type.
- */
 #define INT2VECTOROID  22
 
 DATA(insert OID = 23 ( int4       PGUID  4  10 t b t \054 0   0 int4in int4out int4in int4out i _null_ ));
@@ -216,6 +209,8 @@ DESCR("command identifier type, sequence in transaction id");
 
 DATA(insert OID = 30 ( oidvector  PGUID INDEX_MAX_KEYS*4 -1 f b t \054 0  26 oidvectorin oidvectorout oidvectorin oidvectorout i _null_ ));
 DESCR("array of INDEX_MAX_KEYS oids, used in system tables");
+#define OIDVECTOROID   30
+
 DATA(insert OID = 32 ( SET                PGUID -1  -1 f b t \054 0   0 textin textout textin textout i _null_ ));
 DESCR("set of tuples");
 
index df8dad8..b6e9cee 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: builtins.h,v 1.102 2000/02/16 17:26:25 thomas Exp $
+ * $Id: builtins.h,v 1.103 2000/02/21 03:36:59 tgl Exp $
  *
  * NOTES
  *       This should normally only be included by fmgr.h.
@@ -75,6 +75,7 @@ extern int32 int2in(char *num);
 extern char *int2out(int16 sh);
 extern int16 *int2vectorin(char *shs);
 extern char *int2vectorout(int16 *shs);
+extern bool int2vectoreq(int16 *arg1, int16 *arg2);
 extern int32 *int44in(char *input_string);
 extern char *int44out(int32 *an_array);
 extern int32 int4in(char *num);
@@ -109,7 +110,6 @@ extern bool int42lt(int32 arg1, int32 arg2);
 extern bool int42le(int32 arg1, int32 arg2);
 extern bool int42gt(int32 arg1, int32 arg2);
 extern bool int42ge(int32 arg1, int32 arg2);
-extern bool keyfirsteq(int16 *arg1, int16 arg2);
 extern int32 int4um(int32 arg);
 extern int32 int4pl(int32 arg1, int32 arg2);
 extern int32 int4mi(int32 arg1, int32 arg2);
index 58ef8d6..7b4b679 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: catcache.h,v 1.19 2000/01/26 05:58:37 momjian Exp $
+ * $Id: catcache.h,v 1.20 2000/02/21 03:36:59 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -38,6 +38,8 @@ typedef struct catctup
 #define NCCBUCK 500                            /* CatCache buckets */
 #define MAXTUP 300                             /* Maximum # of tuples cached per cache */
 
+typedef uint32 (* CCHashFunc) (Datum);
+
 typedef struct catcache
 {
        Oid                     relationId;
@@ -52,8 +54,8 @@ typedef struct catcache
        short           cc_maxtup;              /* max # of tuples allowed (LRU) */
        short           cc_nkeys;
        short           cc_size;
-       short           cc_key[4];
-       short           cc_klen[4];
+       short           cc_key[4];              /* AttrNumber of each key */
+       CCHashFunc      cc_hashfunc[4]; /* hash function to use for each key */
        ScanKeyData cc_skey[4];
        struct catcache *cc_next;
        Dllist     *cc_lrulist;         /* LRU list, most recent first */
index 1f3114d..e4e4f04 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2000, PostgreSQL, Inc
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: int8.h,v 1.17 2000/01/26 05:58:38 momjian Exp $
+ * $Id: int8.h,v 1.18 2000/02/21 03:36:59 tgl Exp $
  *
  * NOTES
  * These data types are supported on all 64-bit architectures, and may
@@ -93,9 +93,7 @@ extern int64 *int48(int32 val);
 extern int32 int84(int64 *val);
 
 #ifdef NOT_USED
-extern int64 *int2vector (int16 val);
 extern int16 int82(int64 *val);
-
 #endif
 
 extern float64 i8tod(int64 *val);