OSDN Git Service

Rewrite the rbtree routines so that an RBNode is the first field of the
authorTom Lane <tgl@sss.pgh.pa.us>
Sun, 1 Aug 2010 02:12:51 +0000 (02:12 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Sun, 1 Aug 2010 02:12:51 +0000 (02:12 +0000)
struct representing a tree entry, rather than being a separately allocated
piece of storage.  This API is at least as clean as the old one (if not
more so --- there were some bizarre choices in there) and it permits a
very substantial memory savings, on the order of 2X in ginbulk.c's usage.

Also, fix minor memory leaks in code called by ginEntryInsert, in
particular in ginInsertValue and entryFillRoot, as well as ginEntryInsert
itself.  These leaks resulted in the GIN index build context continuing
to bloat even after we'd filled it to maintenance_work_mem and started
to dump data out to the index.

In combination these fixes restore the GIN index build code to honoring
the maintenance_work_mem limit about as well as it did in 8.4.  Speed
seems on par with 8.4 too, maybe even a bit faster, for a non-pathological
case in which HEAD was formerly slower.

Back-patch to 9.0 so we don't have a performance regression from 8.4.

src/backend/access/gin/ginbtree.c
src/backend/access/gin/ginbulk.c
src/backend/access/gin/ginentrypage.c
src/backend/access/gin/ginfast.c
src/backend/access/gin/gininsert.c
src/backend/utils/misc/rbtree.c
src/include/access/gin.h
src/include/utils/rbtree.h

index bb150a4..ed32866 100644 (file)
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *                     $PostgreSQL: pgsql/src/backend/access/gin/ginbtree.c,v 1.15 2010/01/02 16:57:33 momjian Exp $
+ *                     $PostgreSQL: pgsql/src/backend/access/gin/ginbtree.c,v 1.15.6.1 2010/08/01 02:12:51 tgl Exp $
  *-------------------------------------------------------------------------
  */
 
@@ -267,6 +267,8 @@ findParents(GinBtree btree, GinBtreeStack *stack,
 
 /*
  * Insert value (stored in GinBtree) to tree described by stack
+ *
+ * NB: the passed-in stack is freed, as though by freeGinBtreeStack.
  */
 void
 ginInsertValue(GinBtree btree, GinBtreeStack *stack)
@@ -308,10 +310,11 @@ ginInsertValue(GinBtree btree, GinBtreeStack *stack)
                                PageSetTLI(page, ThisTimeLineID);
                        }
 
-                       UnlockReleaseBuffer(stack->buffer);
+                       LockBuffer(stack->buffer, GIN_UNLOCK);
                        END_CRIT_SECTION();
 
-                       freeGinBtreeStack(stack->parent);
+                       freeGinBtreeStack(stack);
+
                        return;
                }
                else
@@ -325,7 +328,6 @@ ginInsertValue(GinBtree btree, GinBtreeStack *stack)
                         */
                        newlpage = btree->splitPage(btree, stack->buffer, rbuffer, stack->off, &rdata);
 
-
                        ((ginxlogSplit *) (rdata->data))->rootBlkno = rootBlkno;
 
                        parent = stack->parent;
@@ -341,7 +343,6 @@ ginInsertValue(GinBtree btree, GinBtreeStack *stack)
                                ((ginxlogSplit *) (rdata->data))->isRootSplit = TRUE;
                                ((ginxlogSplit *) (rdata->data))->rrlink = InvalidBlockNumber;
 
-
                                page = BufferGetPage(stack->buffer);
                                lpage = BufferGetPage(lbuffer);
                                rpage = BufferGetPage(rbuffer);
@@ -375,10 +376,11 @@ ginInsertValue(GinBtree btree, GinBtreeStack *stack)
 
                                UnlockReleaseBuffer(rbuffer);
                                UnlockReleaseBuffer(lbuffer);
-                               UnlockReleaseBuffer(stack->buffer);
-
+                               LockBuffer(stack->buffer, GIN_UNLOCK);
                                END_CRIT_SECTION();
 
+                               freeGinBtreeStack(stack);
+
                                return;
                        }
                        else
index bb726e6..094479c 100644 (file)
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *                     $PostgreSQL: pgsql/src/backend/access/gin/ginbulk.c,v 1.19 2010/02/26 02:00:33 momjian Exp $
+ *                     $PostgreSQL: pgsql/src/backend/access/gin/ginbulk.c,v 1.19.4.1 2010/08/01 02:12:51 tgl Exp $
  *-------------------------------------------------------------------------
  */
 
 #include "utils/memutils.h"
 
 
-#define DEF_NENTRY     2048
-#define DEF_NPTR       4
+#define DEF_NENTRY     2048            /* EntryAccumulator allocation quantum */
+#define DEF_NPTR       5                       /* ItemPointer initial allocation quantum */
 
-static void *
-ginAppendData(void *old, void *new, void *arg)
-{
-       EntryAccumulator *eo = (EntryAccumulator *) old,
-                          *en = (EntryAccumulator *) new;
 
+/* Combiner function for rbtree.c */
+static void
+ginCombineData(RBNode *existing, const RBNode *newdata, void *arg)
+{
+       EntryAccumulator *eo = (EntryAccumulator *) existing;
+       const EntryAccumulator *en = (const EntryAccumulator *) newdata;
        BuildAccumulator *accum = (BuildAccumulator *) arg;
 
+       /*
+        * Note this code assumes that newdata contains only one itempointer.
+        */
        if (eo->number >= eo->length)
        {
                accum->allocatedMemory -= GetMemoryChunkSpace(eo->list);
@@ -53,29 +57,57 @@ ginAppendData(void *old, void *new, void *arg)
 
        eo->list[eo->number] = en->list[0];
        eo->number++;
-
-       return old;
 }
 
+/* Comparator function for rbtree.c */
 static int
-cmpEntryAccumulator(const void *a, const void *b, void *arg)
+cmpEntryAccumulator(const RBNode *a, const RBNode *b, void *arg)
 {
-       EntryAccumulator *ea = (EntryAccumulator *) a;
-       EntryAccumulator *eb = (EntryAccumulator *) b;
+       const EntryAccumulator *ea = (const EntryAccumulator *) a;
+       const EntryAccumulator *eb = (const EntryAccumulator *) b;
        BuildAccumulator *accum = (BuildAccumulator *) arg;
 
        return compareAttEntries(accum->ginstate, ea->attnum, ea->value,
                                                         eb->attnum, eb->value);
 }
 
+/* Allocator function for rbtree.c */
+static RBNode *
+ginAllocEntryAccumulator(void *arg)
+{
+       BuildAccumulator *accum = (BuildAccumulator *) arg;
+       EntryAccumulator *ea;
+
+       /*
+        * Allocate memory by rather big chunks to decrease overhead.  We have
+        * no need to reclaim RBNodes individually, so this costs nothing.
+        */
+       if (accum->entryallocator == NULL || accum->length >= DEF_NENTRY)
+       {
+               accum->entryallocator = palloc(sizeof(EntryAccumulator) * DEF_NENTRY);
+               accum->allocatedMemory += GetMemoryChunkSpace(accum->entryallocator);
+               accum->length = 0;
+       }
+
+       /* Allocate new RBNode from current chunk */
+       ea = accum->entryallocator + accum->length;
+       accum->length++;
+
+       return (RBNode *) ea;
+}
+
 void
 ginInitBA(BuildAccumulator *accum)
 {
        accum->allocatedMemory = 0;
+       accum->length = 0;
        accum->entryallocator = NULL;
-       accum->tree = rb_create(cmpEntryAccumulator, ginAppendData, NULL, accum);
-       accum->iterator = NULL;
-       accum->tmpList = NULL;
+       accum->tree = rb_create(sizeof(EntryAccumulator),
+                                                       cmpEntryAccumulator,
+                                                       ginCombineData,
+                                                       ginAllocEntryAccumulator,
+                                                       NULL,                           /* no freefunc needed */
+                                                       (void *) accum);
 }
 
 /*
@@ -104,55 +136,41 @@ getDatumCopy(BuildAccumulator *accum, OffsetNumber attnum, Datum value)
 static void
 ginInsertEntry(BuildAccumulator *accum, ItemPointer heapptr, OffsetNumber attnum, Datum entry)
 {
-       EntryAccumulator *key,
-                          *ea;
+       EntryAccumulator key;
+       EntryAccumulator *ea;
+       bool            isNew;
 
        /*
-        * Allocate memory by rather big chunk to decrease overhead, we don't keep
-        * pointer to previously allocated chunks because they will free by
-        * MemoryContextReset() call.
+        * For the moment, fill only the fields of key that will be looked at
+        * by cmpEntryAccumulator or ginCombineData.
         */
-       if (accum->entryallocator == NULL || accum->length >= DEF_NENTRY)
-       {
-               accum->entryallocator = palloc(sizeof(EntryAccumulator) * DEF_NENTRY);
-               accum->allocatedMemory += GetMemoryChunkSpace(accum->entryallocator);
-               accum->length = 0;
-       }
+       key.attnum = attnum;
+       key.value = entry;
+       /* temporarily set up single-entry itempointer list */
+       key.list = heapptr;
 
-       /* "Allocate" new key in chunk */
-       key = accum->entryallocator + accum->length;
-       accum->length++;
+       ea = (EntryAccumulator *) rb_insert(accum->tree, (RBNode *) &key, &isNew);
 
-       key->attnum = attnum;
-       key->value = entry;
-       /* To prevent multiple palloc/pfree cycles, we reuse array */
-       if (accum->tmpList == NULL)
-               accum->tmpList =
-                       (ItemPointerData *) palloc(sizeof(ItemPointerData) * DEF_NPTR);
-       key->list = accum->tmpList;
-       key->list[0] = *heapptr;
-
-       ea = rb_insert(accum->tree, key);
-
-       if (ea == NULL)
+       if (isNew)
        {
                /*
-                * The key has been inserted, so continue initialization.
+                * Finish initializing new tree entry, including making permanent
+                * copies of the datum and itempointer.
                 */
-               key->value = getDatumCopy(accum, attnum, entry);
-               key->length = DEF_NPTR;
-               key->number = 1;
-               key->shouldSort = FALSE;
-               accum->allocatedMemory += GetMemoryChunkSpace(key->list);
-               accum->tmpList = NULL;
+               ea->value = getDatumCopy(accum, attnum, entry);
+               ea->length = DEF_NPTR;
+               ea->number = 1;
+               ea->shouldSort = FALSE;
+               ea->list =
+                       (ItemPointerData *) palloc(sizeof(ItemPointerData) * DEF_NPTR);
+               ea->list[0] = *heapptr;
+               accum->allocatedMemory += GetMemoryChunkSpace(ea->list);
        }
        else
        {
                /*
-                * The key has been appended, so "free" allocated key by decrementing
-                * chunk's counter.
+                * ginCombineData did everything needed.
                 */
-               accum->length--;
        }
 }
 
@@ -214,16 +232,20 @@ qsortCompareItemPointers(const void *a, const void *b)
        return res;
 }
 
+/* Prepare to read out the rbtree contents using ginGetEntry */
+void
+ginBeginBAScan(BuildAccumulator *accum)
+{
+       rb_begin_iterate(accum->tree, LeftRightWalk);
+}
+
 ItemPointerData *
 ginGetEntry(BuildAccumulator *accum, OffsetNumber *attnum, Datum *value, uint32 *n)
 {
        EntryAccumulator *entry;
        ItemPointerData *list;
 
-       if (accum->iterator == NULL)
-               accum->iterator = rb_begin_iterate(accum->tree, LeftRightWalk);
-
-       entry = rb_iterate(accum->iterator);
+       entry = (EntryAccumulator *) rb_iterate(accum->tree);
 
        if (entry == NULL)
                return NULL;
index 6d307c8..1896e09 100644 (file)
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *                     $PostgreSQL: pgsql/src/backend/access/gin/ginentrypage.c,v 1.24 2010/02/26 02:00:33 momjian Exp $
+ *                     $PostgreSQL: pgsql/src/backend/access/gin/ginentrypage.c,v 1.24.4.1 2010/08/01 02:12:51 tgl Exp $
  *-------------------------------------------------------------------------
  */
 
@@ -615,7 +615,7 @@ entrySplitPage(GinBtree btree, Buffer lbuf, Buffer rbuf, OffsetNumber off, XLogR
 }
 
 /*
- * return newly allocate rightmost tuple
+ * return newly allocated rightmost tuple
  */
 IndexTuple
 ginPageGetLinkItup(Buffer buf)
@@ -646,10 +646,12 @@ entryFillRoot(GinBtree btree, Buffer root, Buffer lbuf, Buffer rbuf)
        itup = ginPageGetLinkItup(lbuf);
        if (PageAddItem(page, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, false, false) == InvalidOffsetNumber)
                elog(ERROR, "failed to add item to index root page");
+       pfree(itup);
 
        itup = ginPageGetLinkItup(rbuf);
        if (PageAddItem(page, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, false, false) == InvalidOffsetNumber)
                elog(ERROR, "failed to add item to index root page");
+       pfree(itup);
 }
 
 void
index f8e0b5a..7aaab3a 100644 (file)
@@ -11,7 +11,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *                     $PostgreSQL: pgsql/src/backend/access/gin/ginfast.c,v 1.7 2010/02/11 14:29:50 teodor Exp $
+ *                     $PostgreSQL: pgsql/src/backend/access/gin/ginfast.c,v 1.7.6.1 2010/08/01 02:12:51 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -786,6 +786,7 @@ ginInsertCleanup(Relation index, GinState *ginstate,
                         * significant amount of time - so, run it without locking pending
                         * list.
                         */
+                       ginBeginBAScan(&accum);
                        while ((list = ginGetEntry(&accum, &attnum, &entry, &nlist)) != NULL)
                        {
                                ginEntryInsert(index, ginstate, attnum, entry, list, nlist, FALSE);
@@ -820,6 +821,7 @@ ginInsertCleanup(Relation index, GinState *ginstate,
                                ginInitBA(&accum);
                                processPendingPage(&accum, &datums, page, maxoff + 1);
 
+                               ginBeginBAScan(&accum);
                                while ((list = ginGetEntry(&accum, &attnum, &entry, &nlist)) != NULL)
                                        ginEntryInsert(index, ginstate, attnum, entry, list, nlist, FALSE);
                        }
index e2a5e8b..951d616 100644 (file)
@@ -8,7 +8,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *                     $PostgreSQL: pgsql/src/backend/access/gin/gininsert.c,v 1.26 2010/02/11 14:29:50 teodor Exp $
+ *                     $PostgreSQL: pgsql/src/backend/access/gin/gininsert.c,v 1.26.6.1 2010/08/01 02:12:51 tgl Exp $
  *-------------------------------------------------------------------------
  */
 
@@ -176,6 +176,7 @@ ginEntryInsert(Relation index, GinState *ginstate,
                        gdi = prepareScanPostingTree(index, rootPostingTree, FALSE);
                        gdi->btree.isBuild = isBuild;
                        insertItemPointer(gdi, items, nitem);
+                       pfree(gdi);
 
                        return;
                }
@@ -254,6 +255,7 @@ ginBuildCallback(Relation index, HeapTuple htup, Datum *values,
                uint32          nlist;
                OffsetNumber attnum;
 
+               ginBeginBAScan(&buildstate->accum);
                while ((list = ginGetEntry(&buildstate->accum, &attnum, &entry, &nlist)) != NULL)
                {
                        /* there could be many entries, so be willing to abort here */
@@ -360,6 +362,7 @@ ginbuild(PG_FUNCTION_ARGS)
 
        /* dump remaining entries to the index */
        oldCtx = MemoryContextSwitchTo(buildstate.tmpCtx);
+       ginBeginBAScan(&buildstate.accum);
        while ((list = ginGetEntry(&buildstate.accum, &attnum, &entry, &nlist)) != NULL)
        {
                /* there could be many entries, so be willing to abort here */
index b5da48d..be50880 100644 (file)
  * longest path from root to leaf is only about twice as long as the shortest,
  * so lookups are guaranteed to run in O(lg n) time.
  *
- * Copyright (c) 1996-2009, PostgreSQL Global Development Group
+ * Copyright (c) 2009-2010, PostgreSQL Global Development Group
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/utils/misc/rbtree.c,v 1.3 2010/02/26 02:01:14 momjian Exp $
+ *       $PostgreSQL: pgsql/src/backend/utils/misc/rbtree.c,v 1.3.4.1 2010/08/01 02:12:51 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
 
 #include "utils/rbtree.h"
 
-/**********************************************************************
- *                                              Declarations                                                             *
- **********************************************************************/
 
 /*
- * Values for RBNode->iteratorState
+ * Values of RBNode.iteratorState
+ *
+ * Note that iteratorState has an undefined value except in nodes that are
+ * currently being visited by an active iteration.
  */
 #define InitialState   (0)
 #define FirstStepDone  (1)
 #define ThirdStepDone  (3)
 
 /*
- * Colors of node
+ * Colors of nodes (values of RBNode.color)
  */
 #define RBBLACK                (0)
 #define RBRED          (1)
 
-typedef struct RBNode
-{
-       uint32          iteratorState:2,
-       color:          1,
-                               unused:29;
-       struct RBNode *left;
-       struct RBNode *right;
-       struct RBNode *parent;
-       void       *data;
-} RBNode;
-
+/*
+ * RBTree control structure
+ */
 struct RBTree
 {
-       RBNode     *root;
+       RBNode     *root;                       /* root node, or RBNIL if tree is empty */
+
+       /* Iteration state */
+       RBNode     *cur;                        /* current iteration node */
+       RBNode     *(*iterate) (RBTree *rb);
+
+       /* Remaining fields are constant after rb_create */
+
+       Size            node_size;              /* actual size of tree nodes */
+       /* The caller-supplied manipulation functions */
        rb_comparator comparator;
-       rb_appendator appendator;
+       rb_combiner combiner;
+       rb_allocfunc allocfunc;
        rb_freefunc freefunc;
+       /* Passthrough arg passed to all manipulation functions */
        void       *arg;
 };
 
-struct RBTreeIterator
-{
-       RBNode     *node;
-       void       *(*iterate) (RBTreeIterator *iterator);
-};
-
 /*
  * all leafs are sentinels, use customized NIL name to prevent
- * collision with sytem-wide NIL which is actually NULL
+ * collision with system-wide constant NIL which is actually NULL
  */
-#define RBNIL &sentinel
+#define RBNIL (&sentinel)
 
-RBNode         sentinel = {InitialState, RBBLACK, 0, RBNIL, RBNIL, NULL, NULL};
+static RBNode  sentinel = {InitialState, RBBLACK, RBNIL, RBNIL, NULL};
 
-/**********************************************************************
- *                                               Create                                                                          *
- **********************************************************************/
 
+/*
+ * rb_create: create an empty RBTree
+ *
+ * Arguments are:
+ *     node_size: actual size of tree nodes (> sizeof(RBNode))
+ *     The manipulation functions:
+ *     comparator: compare two RBNodes for less/equal/greater
+ *     combiner: merge an existing tree entry with a new one
+ *     allocfunc: allocate a new RBNode
+ *     freefunc: free an old RBNode
+ *     arg: passthrough pointer that will be passed to the manipulation functions
+ *
+ * Note that the combiner's righthand argument will be a "proposed" tree node,
+ * ie the input to rb_insert, in which the RBNode fields themselves aren't
+ * valid.  Similarly, either input to the comparator may be a "proposed" node.
+ * This shouldn't matter since the functions aren't supposed to look at the
+ * RBNode fields, only the extra fields of the struct the RBNode is embedded
+ * in.
+ *
+ * The freefunc should just be pfree or equivalent; it should NOT attempt
+ * to free any subsidiary data, because the node passed to it may not contain
+ * valid data!  freefunc can be NULL if caller doesn't require retail
+ * space reclamation.
+ *
+ * The RBTree node is palloc'd in the caller's memory context.  Note that
+ * all contents of the tree are actually allocated by the caller, not here.
+ *
+ * Since tree contents are managed by the caller, there is currently not
+ * an explicit "destroy" operation; typically a tree would be freed by
+ * resetting or deleting the memory context it's stored in.  You can pfree
+ * the RBTree node if you feel the urge.
+ */
 RBTree *
-rb_create(rb_comparator comparator, rb_appendator appendator,
-                 rb_freefunc freefunc, void *arg)
+rb_create(Size node_size,
+                 rb_comparator comparator,
+                 rb_combiner combiner,
+                 rb_allocfunc allocfunc,
+                 rb_freefunc freefunc,
+                 void *arg)
 {
-       RBTree     *tree = palloc(sizeof(RBTree));
+       RBTree     *tree = (RBTree *) palloc(sizeof(RBTree));
+
+       Assert(node_size > sizeof(RBNode));
 
        tree->root = RBNIL;
+       tree->cur = RBNIL;
+       tree->iterate = NULL;
+       tree->node_size = node_size;
        tree->comparator = comparator;
-       tree->appendator = appendator;
+       tree->combiner = combiner;
+       tree->allocfunc = allocfunc;
        tree->freefunc = freefunc;
-
        tree->arg = arg;
 
        return tree;
 }
 
+/* Copy the additional data fields from one RBNode to another */
+static inline void
+rb_copy_data(RBTree *rb, RBNode *dest, const RBNode *src)
+{
+       memcpy(dest + 1, src + 1, rb->node_size - sizeof(RBNode));
+}
+
 /**********************************************************************
  *                                               Search                                                                          *
  **********************************************************************/
 
-void *
-rb_find(RBTree *rb, void *data)
+/*
+ * rb_find: search for a value in an RBTree
+ *
+ * data represents the value to try to find.  Its RBNode fields need not
+ * be valid, it's the extra data in the larger struct that is of interest.
+ *
+ * Returns the matching tree entry, or NULL if no match is found.
+ */
+RBNode *
+rb_find(RBTree *rb, const RBNode *data)
 {
        RBNode     *node = rb->root;
-       int                     cmp;
 
        while (node != RBNIL)
        {
-               cmp = rb->comparator(data, node->data, rb->arg);
+               int             cmp = rb->comparator(data, node, rb->arg);
 
                if (cmp == 0)
-                       return node->data;
+                       return node;
                else if (cmp < 0)
                        node = node->left;
                else
@@ -125,6 +174,32 @@ rb_find(RBTree *rb, void *data)
        return NULL;
 }
 
+/*
+ * rb_leftmost: fetch the leftmost (smallest-valued) tree node.
+ * Returns NULL if tree is empty.
+ *
+ * Note: in the original implementation this included an unlink step, but
+ * that's a bit awkward.  Just call rb_delete on the result if that's what
+ * you want.
+ */
+RBNode *
+rb_leftmost(RBTree *rb)
+{
+       RBNode     *node = rb->root;
+       RBNode     *leftmost = rb->root;
+
+       while (node != RBNIL)
+       {
+               leftmost = node;
+               node = node->left;
+       }
+
+       if (leftmost != RBNIL)
+               return leftmost;
+
+       return NULL;
+}
+
 /**********************************************************************
  *                                                       Insertion                                                               *
  **********************************************************************/
@@ -309,13 +384,24 @@ rb_insert_fixup(RBTree *rb, RBNode *x)
 }
 
 /*
- * Allocate node for data and insert in tree.
+ * rb_insert: insert a new value into the tree.
  *
- * Return old data (or result of appendator method) if it exists and NULL
- * otherwise.
+ * data represents the value to insert.  Its RBNode fields need not
+ * be valid, it's the extra data in the larger struct that is of interest.
+ *
+ * If the value represented by "data" is not present in the tree, then
+ * we copy "data" into a new tree entry and return that node, setting *isNew
+ * to true.
+ *
+ * If the value represented by "data" is already present, then we call the
+ * combiner function to merge data into the existing node, and return the
+ * existing node, setting *isNew to false.
+ *
+ * "data" is unmodified in either case; it's typically just a local
+ * variable in the caller.
  */
-void *
-rb_insert(RBTree *rb, void *data)
+RBNode *
+rb_insert(RBTree *rb, const RBNode *data, bool *isNew)
 {
        RBNode     *current,
                           *parent,
@@ -325,43 +411,37 @@ rb_insert(RBTree *rb, void *data)
        /* find where node belongs */
        current = rb->root;
        parent = NULL;
-       cmp = 0;
+       cmp = 0;                                        /* just to prevent compiler warning */
+
        while (current != RBNIL)
        {
-               cmp = rb->comparator(data, current->data, rb->arg);
+               cmp = rb->comparator(data, current, rb->arg);
                if (cmp == 0)
                {
                        /*
-                        * Found node with given key.  If appendator method is provided,
-                        * call it to join old and new data; else, new data replaces old
-                        * data.
+                        * Found node with given key.  Apply combiner.
                         */
-                       if (rb->appendator)
-                       {
-                               current->data = rb->appendator(current->data, data, rb->arg);
-                               return current->data;
-                       }
-                       else
-                       {
-                               void       *old = current->data;
-
-                               current->data = data;
-                               return old;
-                       }
+                       rb->combiner(current, data, rb->arg);
+                       *isNew = false;
+                       return current;
                }
                parent = current;
                current = (cmp < 0) ? current->left : current->right;
        }
 
-       /* setup new node in tree */
-       x = palloc(sizeof(RBNode));
-       x->data = data;
-       x->parent = parent;
-       x->left = RBNIL;
-       x->right = RBNIL;
-       x->color = RBRED;
+       /*
+        * Value is not present, so create a new node containing data.
+        */
+       *isNew = true;
+
+       x = rb->allocfunc(rb->arg);
 
        x->iteratorState = InitialState;
+       x->color = RBRED;
+       x->left = RBNIL;
+       x->right = RBNIL;
+       x->parent = parent;
+       rb_copy_data(rb, x, data);
 
        /* insert node in tree */
        if (parent)
@@ -377,7 +457,8 @@ rb_insert(RBTree *rb, void *data)
        }
 
        rb_insert_fixup(rb, x);
-       return NULL;
+
+       return x;
 }
 
 /**********************************************************************
@@ -533,11 +614,11 @@ rb_delete_node(RBTree *rb, RBNode *z)
        }
 
        /*
-        * If we removed the tree successor of z rather than z itself, then attach
+        * If we removed the tree successor of z rather than z itself, then move
         * the data for the removed node to the one we were supposed to remove.
         */
        if (y != z)
-               z->data = y->data;
+               rb_copy_data(rb, z, y);
 
        /*
         * Removing a black node might make some paths from root to leaf contain
@@ -546,260 +627,245 @@ rb_delete_node(RBTree *rb, RBNode *z)
        if (y->color == RBBLACK)
                rb_delete_fixup(rb, x);
 
-       pfree(y);
-}
-
-extern void
-rb_delete(RBTree *rb, void *data)
-{
-       RBNode     *node = rb->root;
-       int                     cmp;
-
-       while (node != RBNIL)
-       {
-               cmp = rb->comparator(data, node->data, rb->arg);
-
-               if (cmp == 0)
-               {
-                       /* found node to delete */
-                       if (rb->freefunc)
-                               rb->freefunc (node->data);
-
-                       node->data = NULL;
-                       rb_delete_node(rb, node);
-                       return;
-               }
-               else if (cmp < 0)
-                       node = node->left;
-               else
-                       node = node->right;
-       }
+       /* Now we can recycle the y node */
+       if (rb->freefunc)
+               rb->freefunc(y, rb->arg);
 }
 
 /*
- * Return data on left most node and delete
- * that node
+ * rb_delete: remove the given tree entry
+ *
+ * "node" must have previously been found via rb_find or rb_leftmost.
+ * It is caller's responsibility to free any subsidiary data attached
+ * to the node before calling rb_delete.  (Do *not* try to push that
+ * responsibility off to the freefunc, as some other physical node
+ * may be the one actually freed!)
  */
-extern void *
-rb_leftmost(RBTree *rb)
+void
+rb_delete(RBTree *rb, RBNode *node)
 {
-       RBNode     *node = rb->root;
-       RBNode     *leftmost = rb->root;
-       void       *res = NULL;
-
-       while (node != RBNIL)
-       {
-               leftmost = node;
-               node = node->left;
-       }
-
-       if (leftmost != RBNIL)
-       {
-               res = leftmost->data;
-               leftmost->data = NULL;
-               rb_delete_node(rb, leftmost);
-       }
-
-       return res;
+       rb_delete_node(rb, node);
 }
 
 /**********************************************************************
  *                                               Traverse                                                                        *
  **********************************************************************/
 
-static void *
-rb_next_node(RBTreeIterator *iterator, RBNode *node)
-{
-       node->iteratorState = InitialState;
-       iterator->node = node;
-       return iterator->iterate(iterator);
-}
-
-static void *
-rb_left_right_iterator(RBTreeIterator *iterator)
+/*
+ * The iterator routines were originally coded in tail-recursion style,
+ * which is nice to look at, but is trouble if your compiler isn't smart
+ * enough to optimize it.  Now we just use looping.
+ */
+#define descend(next_node) \
+       do { \
+               (next_node)->iteratorState = InitialState; \
+               node = rb->cur = (next_node); \
+               goto restart; \
+       } while (0)
+
+#define ascend(next_node) \
+       do { \
+               node = rb->cur = (next_node); \
+               goto restart; \
+       } while (0)
+
+
+static RBNode *
+rb_left_right_iterator(RBTree *rb)
 {
-       RBNode     *node = iterator->node;
+       RBNode     *node = rb->cur;
 
+restart:
        switch (node->iteratorState)
        {
                case InitialState:
                        if (node->left != RBNIL)
                        {
                                node->iteratorState = FirstStepDone;
-                               return rb_next_node(iterator, node->left);
+                               descend(node->left);
                        }
+                       /* FALL THROUGH */
                case FirstStepDone:
                        node->iteratorState = SecondStepDone;
-                       return node->data;
+                       return node;
                case SecondStepDone:
                        if (node->right != RBNIL)
                        {
                                node->iteratorState = ThirdStepDone;
-                               return rb_next_node(iterator, node->right);
+                               descend(node->right);
                        }
+                       /* FALL THROUGH */
                case ThirdStepDone:
                        if (node->parent)
-                       {
-                               iterator->node = node->parent;
-                               return iterator->iterate(iterator);
-                       }
+                               ascend(node->parent);
                        break;
                default:
-                       elog(ERROR, "Unknow node state: %d", node->iteratorState);
+                       elog(ERROR, "unrecognized rbtree node state: %d",
+                                node->iteratorState);
        }
 
        return NULL;
 }
 
-static void *
-rb_right_left_iterator(RBTreeIterator *iterator)
+static RBNode *
+rb_right_left_iterator(RBTree *rb)
 {
-       RBNode     *node = iterator->node;
+       RBNode     *node = rb->cur;
 
+restart:
        switch (node->iteratorState)
        {
                case InitialState:
                        if (node->right != RBNIL)
                        {
                                node->iteratorState = FirstStepDone;
-                               return rb_next_node(iterator, node->right);
+                               descend(node->right);
                        }
+                       /* FALL THROUGH */
                case FirstStepDone:
                        node->iteratorState = SecondStepDone;
-                       return node->data;
+                       return node;
                case SecondStepDone:
                        if (node->left != RBNIL)
                        {
                                node->iteratorState = ThirdStepDone;
-                               return rb_next_node(iterator, node->left);
+                               descend(node->left);
                        }
+                       /* FALL THROUGH */
                case ThirdStepDone:
                        if (node->parent)
-                       {
-                               iterator->node = node->parent;
-                               return iterator->iterate(iterator);
-                       }
+                               ascend(node->parent);
                        break;
                default:
-                       elog(ERROR, "Unknow node state: %d", node->iteratorState);
+                       elog(ERROR, "unrecognized rbtree node state: %d",
+                                node->iteratorState);
        }
 
        return NULL;
 }
 
-static void *
-rb_direct_iterator(RBTreeIterator *iterator)
+static RBNode *
+rb_direct_iterator(RBTree *rb)
 {
-       RBNode     *node = iterator->node;
+       RBNode     *node = rb->cur;
 
+restart:
        switch (node->iteratorState)
        {
                case InitialState:
                        node->iteratorState = FirstStepDone;
-                       return node->data;
+                       return node;
                case FirstStepDone:
                        if (node->left != RBNIL)
                        {
                                node->iteratorState = SecondStepDone;
-                               return rb_next_node(iterator, node->left);
+                               descend(node->left);
                        }
+                       /* FALL THROUGH */
                case SecondStepDone:
                        if (node->right != RBNIL)
                        {
                                node->iteratorState = ThirdStepDone;
-                               return rb_next_node(iterator, node->right);
+                               descend(node->right);
                        }
+                       /* FALL THROUGH */
                case ThirdStepDone:
                        if (node->parent)
-                       {
-                               iterator->node = node->parent;
-                               return iterator->iterate(iterator);
-                       }
+                               ascend(node->parent);
                        break;
                default:
-                       elog(ERROR, "Unknow node state: %d", node->iteratorState);
+                       elog(ERROR, "unrecognized rbtree node state: %d",
+                                node->iteratorState);
        }
 
        return NULL;
 }
 
-static void *
-rb_inverted_iterator(RBTreeIterator *iterator)
+static RBNode *
+rb_inverted_iterator(RBTree *rb)
 {
-       RBNode     *node = iterator->node;
+       RBNode     *node = rb->cur;
 
+restart:
        switch (node->iteratorState)
        {
                case InitialState:
                        if (node->left != RBNIL)
                        {
                                node->iteratorState = FirstStepDone;
-                               return rb_next_node(iterator, node->left);
+                               descend(node->left);
                        }
+                       /* FALL THROUGH */
                case FirstStepDone:
                        if (node->right != RBNIL)
                        {
                                node->iteratorState = SecondStepDone;
-                               return rb_next_node(iterator, node->right);
+                               descend(node->right);
                        }
+                       /* FALL THROUGH */
                case SecondStepDone:
                        node->iteratorState = ThirdStepDone;
-                       return node->data;
+                       return node;
                case ThirdStepDone:
                        if (node->parent)
-                       {
-                               iterator->node = node->parent;
-                               return iterator->iterate(iterator);
-                       }
+                               ascend(node->parent);
                        break;
                default:
-                       elog(ERROR, "Unknow node state: %d", node->iteratorState);
+                       elog(ERROR, "unrecognized rbtree node state: %d",
+                                node->iteratorState);
        }
 
        return NULL;
 }
 
-RBTreeIterator *
+/*
+ * rb_begin_iterate: prepare to traverse the tree in any of several orders
+ *
+ * After calling rb_begin_iterate, call rb_iterate repeatedly until it
+ * returns NULL or the traversal stops being of interest.
+ *
+ * If the tree is changed during traversal, results of further calls to
+ * rb_iterate are unspecified.
+ *
+ * Note: this used to return a separately palloc'd iterator control struct,
+ * but that's a bit pointless since the data structure is incapable of
+ * supporting multiple concurrent traversals.  Now we just keep the state
+ * in RBTree.
+ */
+void
 rb_begin_iterate(RBTree *rb, RBOrderControl ctrl)
 {
-       RBTreeIterator *iterator = palloc(sizeof(RBTreeIterator));
-
-       iterator->node = rb->root;
-       if (iterator->node != RBNIL)
-               iterator->node->iteratorState = InitialState;
+       rb->cur = rb->root;
+       if (rb->cur != RBNIL)
+               rb->cur->iteratorState = InitialState;
 
        switch (ctrl)
        {
                case LeftRightWalk:             /* visit left, then self, then right */
-                       iterator->iterate = rb_left_right_iterator;
+                       rb->iterate = rb_left_right_iterator;
                        break;
                case RightLeftWalk:             /* visit right, then self, then left */
-                       iterator->iterate = rb_right_left_iterator;
+                       rb->iterate = rb_right_left_iterator;
                        break;
                case DirectWalk:                /* visit self, then left, then right */
-                       iterator->iterate = rb_direct_iterator;
+                       rb->iterate = rb_direct_iterator;
                        break;
                case InvertedWalk:              /* visit left, then right, then self */
-                       iterator->iterate = rb_inverted_iterator;
+                       rb->iterate = rb_inverted_iterator;
                        break;
                default:
-                       elog(ERROR, "Unknown iterator order: %d", ctrl);
+                       elog(ERROR, "unrecognized rbtree iteration order: %d", ctrl);
        }
-
-       return iterator;
 }
 
-void *
-rb_iterate(RBTreeIterator *iterator)
+/*
+ * rb_iterate: return the next node in traversal order, or NULL if no more
+ */
+RBNode *
+rb_iterate(RBTree *rb)
 {
-       if (iterator->node == RBNIL)
+       if (rb->cur == RBNIL)
                return NULL;
 
-       return iterator->iterate(iterator);
-}
-
-void
-rb_free_iterator(RBTreeIterator *iterator)
-{
-       pfree(iterator);
+       return rb->iterate(rb);
 }
index cd33832..07eb373 100644 (file)
@@ -4,7 +4,7 @@
  *
  *     Copyright (c) 2006-2010, PostgreSQL Global Development Group
  *
- *     $PostgreSQL: pgsql/src/include/access/gin.h,v 1.38.4.1 2010/07/31 00:31:04 tgl Exp $
+ *     $PostgreSQL: pgsql/src/include/access/gin.h,v 1.38.4.2 2010/08/01 02:12:51 tgl Exp $
  *--------------------------------------------------------------------------
  */
 #ifndef GIN_H
@@ -565,6 +565,7 @@ extern Datum ginarrayconsistent(PG_FUNCTION_ARGS);
 /* ginbulk.c */
 typedef struct EntryAccumulator
 {
+       RBNode          rbnode;
        Datum           value;
        uint32          length;
        uint32          number;
@@ -579,15 +580,14 @@ typedef struct
        long            allocatedMemory;
        uint32          length;
        EntryAccumulator *entryallocator;
-       ItemPointerData *tmpList;
        RBTree     *tree;
-       RBTreeIterator *iterator;
 } BuildAccumulator;
 
 extern void ginInitBA(BuildAccumulator *accum);
 extern void ginInsertRecordBA(BuildAccumulator *accum,
                                  ItemPointer heapptr,
                                  OffsetNumber attnum, Datum *entries, int32 nentry);
+extern void ginBeginBAScan(BuildAccumulator *accum);
 extern ItemPointerData *ginGetEntry(BuildAccumulator *accum, OffsetNumber *attnum, Datum *entry, uint32 *n);
 
 /* ginfast.c */
index 77abff8..c2203f1 100644 (file)
@@ -3,44 +3,64 @@
  * rbtree.h
  *       interface for PostgreSQL generic Red-Black binary tree package
  *
- * Copyright (c) 1996-2009, PostgreSQL Global Development Group
+ * Copyright (c) 2009-2010, PostgreSQL Global Development Group
  *
  * IDENTIFICATION
- *             $PostgreSQL: pgsql/src/include/utils/rbtree.h,v 1.3 2010/05/11 18:14:01 rhaas Exp $
+ *             $PostgreSQL: pgsql/src/include/utils/rbtree.h,v 1.3.2.1 2010/08/01 02:12:51 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
-
 #ifndef RBTREE_H
 #define RBTREE_H
 
+/*
+ * RBNode is intended to be used as the first field of a larger struct,
+ * whose additional fields carry whatever payload data the caller needs
+ * for a tree entry.  (The total size of that larger struct is passed to
+ * rb_create.)  RBNode is declared here to support this usage, but
+ * callers must treat it as an opaque struct.
+ */
+typedef struct RBNode
+{
+       char            iteratorState;  /* workspace for iterating through tree */
+       char            color;                  /* node's current color, red or black */
+       struct RBNode *left;            /* left child, or RBNIL if none */
+       struct RBNode *right;           /* right child, or RBNIL if none */
+       struct RBNode *parent;          /* parent, or NULL (not RBNIL!) if none */
+} RBNode;
+
+/* Opaque struct representing a whole tree */
 typedef struct RBTree RBTree;
-typedef struct RBTreeIterator RBTreeIterator;
 
-typedef int (*rb_comparator) (const void *a, const void *b, void *arg);
-typedef void *(*rb_appendator) (void *currentdata, void *newval, void *arg);
-typedef void (*rb_freefunc) (void *a);
+/* Available tree iteration orderings */
+typedef enum RBOrderControl
+{
+       LeftRightWalk,                          /* inorder: left child, node, right child */
+       RightLeftWalk,                          /* reverse inorder: right, node, left */
+       DirectWalk,                                     /* preorder: node, left child, right child */
+       InvertedWalk                            /* postorder: left child, right child, node */
+} RBOrderControl;
+
+/* Support functions to be provided by caller */
+typedef int (*rb_comparator) (const RBNode *a, const RBNode *b, void *arg);
+typedef void (*rb_combiner) (RBNode *existing, const RBNode *newdata, void *arg);
+typedef RBNode *(*rb_allocfunc) (void *arg);
+typedef void (*rb_freefunc) (RBNode *x, void *arg);
 
-extern RBTree *rb_create(rb_comparator comparator,
-                 rb_appendator appendator,
+extern RBTree *rb_create(Size node_size,
+                 rb_comparator comparator,
+                 rb_combiner combiner,
+                 rb_allocfunc allocfunc,
                  rb_freefunc freefunc,
                  void *arg);
 
-extern void *rb_find(RBTree *rb, void *data);
-extern void *rb_insert(RBTree *rb, void *data);
-extern void rb_delete(RBTree *rb, void *data);
-extern void *rb_leftmost(RBTree *rb);
+extern RBNode *rb_find(RBTree *rb, const RBNode *data);
+extern RBNode *rb_leftmost(RBTree *rb);
 
-typedef enum RBOrderControl
-{
-       LeftRightWalk,
-       RightLeftWalk,
-       DirectWalk,
-       InvertedWalk
-} RBOrderControl;
+extern RBNode *rb_insert(RBTree *rb, const RBNode *data, bool *isNew);
+extern void rb_delete(RBTree *rb, RBNode *node);
 
-extern RBTreeIterator *rb_begin_iterate(RBTree *rb, RBOrderControl ctrl);
-extern void *rb_iterate(RBTreeIterator *iterator);
-extern void rb_free_iterator(RBTreeIterator *iterator);
+extern void rb_begin_iterate(RBTree *rb, RBOrderControl ctrl);
+extern RBNode *rb_iterate(RBTree *rb);
 
-#endif
+#endif /* RBTREE_H */