From 554506871bd3a73a5d9fa4ee3aa0b0fbf406f8ab Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Fri, 3 Dec 2010 20:52:18 -0500 Subject: [PATCH] KNNGIST, otherwise known as order-by-operator support for GIST. This commit represents a rather heavily editorialized version of Teodor's builtin_knngist_itself-0.8.2 and builtin_knngist_proc-0.8.1 patches. I redid the opclass API to add a separate Distance method instead of turning the Consistent method into an illogical mess, fixed some bit-rot in the rbtree interfaces, and generally worked over the code style and comments. There's still no non-code documentation to speak of, but I'll work on that separately. Some contrib-module changes are also yet to come (right now, point <-> point is the only KNN-ified operator). Teodor Sigaev and Tom Lane --- src/backend/access/gist/gist.c | 10 + src/backend/access/gist/gistget.c | 819 +++++++++++++++-------------- src/backend/access/gist/gistproc.c | 97 +++- src/backend/access/gist/gistscan.c | 179 +++++-- src/include/access/gist.h | 10 +- src/include/access/gist_private.h | 117 +++-- src/include/catalog/catversion.h | 2 +- src/include/catalog/pg_am.h | 2 +- src/include/catalog/pg_amop.h | 1 + src/include/catalog/pg_amproc.h | 1 + src/include/catalog/pg_proc.h | 4 +- src/include/utils/geo_decls.h | 1 + src/test/regress/expected/create_index.out | 136 +++++ src/test/regress/expected/opr_sanity.out | 29 +- src/test/regress/sql/create_index.sql | 36 ++ src/test/regress/sql/opr_sanity.sql | 26 +- 16 files changed, 972 insertions(+), 498 deletions(-) diff --git a/src/backend/access/gist/gist.c b/src/backend/access/gist/gist.c index 8c2dbc940f..d6aaea2162 100644 --- a/src/backend/access/gist/gist.c +++ b/src/backend/access/gist/gist.c @@ -1030,6 +1030,9 @@ gistnewroot(Relation r, Buffer buffer, IndexTuple *itup, int len, ItemPointer ke END_CRIT_SECTION(); } +/* + * Fill a GISTSTATE with information about the index + */ void initGISTstate(GISTSTATE *giststate, Relation index) { @@ -1064,6 +1067,13 @@ initGISTstate(GISTSTATE *giststate, Relation index) fmgr_info_copy(&(giststate->equalFn[i]), index_getprocinfo(index, i + 1, GIST_EQUAL_PROC), CurrentMemoryContext); + /* opclasses are not required to provide a Distance method */ + if (OidIsValid(index_getprocid(index, i + 1, GIST_DISTANCE_PROC))) + fmgr_info_copy(&(giststate->distanceFn[i]), + index_getprocinfo(index, i + 1, GIST_DISTANCE_PROC), + CurrentMemoryContext); + else + giststate->distanceFn[i].fn_oid = InvalidOid; } } diff --git a/src/backend/access/gist/gistget.c b/src/backend/access/gist/gistget.c index fab13eed52..f0418a08af 100644 --- a/src/backend/access/gist/gistget.c +++ b/src/backend/access/gist/gistget.c @@ -20,504 +20,553 @@ #include "miscadmin.h" #include "pgstat.h" #include "storage/bufmgr.h" +#include "utils/builtins.h" #include "utils/memutils.h" -static OffsetNumber gistfindnext(IndexScanDesc scan, OffsetNumber n); -static int64 gistnext(IndexScanDesc scan, TIDBitmap *tbm); -static bool gistindex_keytest(IndexTuple tuple, IndexScanDesc scan, - OffsetNumber offset); - -static void -killtuple(Relation r, GISTScanOpaque so, ItemPointer iptr) +/* + * gistindex_keytest() -- does this index tuple satisfy the scan key(s)? + * + * The index tuple might represent either a heap tuple or a lower index page, + * depending on whether the containing page is a leaf page or not. + * + * On success return for a heap tuple, *recheck_p is set to indicate + * whether recheck is needed. We recheck if any of the consistent() functions + * request it. recheck is not interesting when examining a non-leaf entry, + * since we must visit the lower index page if there's any doubt. + * + * If we are doing an ordered scan, so->distances[] is filled with distance + * data from the distance() functions before returning success. + * + * We must decompress the key in the IndexTuple before passing it to the + * sk_funcs (which actually are the opclass Consistent or Distance methods). + * + * Note that this function is always invoked in a short-lived memory context, + * so we don't need to worry about cleaning up allocated memory, either here + * or in the implementation of any Consistent or Distance methods. + */ +static bool +gistindex_keytest(IndexScanDesc scan, + IndexTuple tuple, + Page page, + OffsetNumber offset, + bool *recheck_p) { - Page p; - OffsetNumber offset; + GISTScanOpaque so = (GISTScanOpaque) scan->opaque; + GISTSTATE *giststate = so->giststate; + ScanKey key = scan->keyData; + int keySize = scan->numberOfKeys; + double *distance_p; + Relation r = scan->indexRelation; - LockBuffer(so->curbuf, GIST_SHARE); - gistcheckpage(r, so->curbuf); - p = (Page) BufferGetPage(so->curbuf); + *recheck_p = false; - if (XLByteEQ(so->stack->lsn, PageGetLSN(p))) + /* + * If it's a leftover invalid tuple from pre-9.1, treat it as a match + * with minimum possible distances. This means we'll always follow it + * to the referenced page. + */ + if (GistTupleIsInvalid(tuple)) { - /* page unchanged, so all is simple */ - offset = ItemPointerGetOffsetNumber(iptr); - ItemIdMarkDead(PageGetItemId(p, offset)); - SetBufferCommitInfoNeedsSave(so->curbuf); + int i; + + for (i = 0; i < scan->numberOfOrderBys; i++) + so->distances[i] = -get_float8_infinity(); + *recheck_p = true; /* probably unnecessary */ + return true; } - else + + /* Check whether it matches according to the Consistent functions */ + while (keySize > 0) { - OffsetNumber maxoff = PageGetMaxOffsetNumber(p); + Datum datum; + bool isNull; - for (offset = FirstOffsetNumber; offset <= maxoff; offset = OffsetNumberNext(offset)) - { - IndexTuple ituple = (IndexTuple) PageGetItem(p, PageGetItemId(p, offset)); + datum = index_getattr(tuple, + key->sk_attno, + giststate->tupdesc, + &isNull); - if (ItemPointerEquals(&(ituple->t_tid), iptr)) + if (key->sk_flags & SK_ISNULL) + { + /* + * On non-leaf page we can't conclude that child hasn't NULL + * values because of assumption in GiST: union (VAL, NULL) is VAL. + * But if on non-leaf page key IS NULL, then all children are + * NULL. + */ + if (key->sk_flags & SK_SEARCHNULL) { - /* found */ - ItemIdMarkDead(PageGetItemId(p, offset)); - SetBufferCommitInfoNeedsSave(so->curbuf); - break; + if (GistPageIsLeaf(page) && !isNull) + return false; + } + else + { + Assert(key->sk_flags & SK_SEARCHNOTNULL); + if (isNull) + return false; } } - } + else if (isNull) + { + return false; + } + else + { + Datum test; + bool recheck; + GISTENTRY de; - LockBuffer(so->curbuf, GIST_UNLOCK); -} + gistdentryinit(giststate, key->sk_attno - 1, &de, + datum, r, page, offset, + FALSE, isNull); -/* - * gistgettuple() -- Get the next tuple in the scan - */ -Datum -gistgettuple(PG_FUNCTION_ARGS) -{ - IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); - ScanDirection dir = (ScanDirection) PG_GETARG_INT32(1); - GISTScanOpaque so; - bool res; + /* + * Call the Consistent function to evaluate the test. The + * arguments are the index datum (as a GISTENTRY*), the comparison + * datum, the comparison operator's strategy number and subtype + * from pg_amop, and the recheck flag. + * + * (Presently there's no need to pass the subtype since it'll + * always be zero, but might as well pass it for possible future + * use.) + * + * We initialize the recheck flag to true (the safest assumption) + * in case the Consistent function forgets to set it. + */ + recheck = true; - so = (GISTScanOpaque) scan->opaque; + test = FunctionCall5(&key->sk_func, + PointerGetDatum(&de), + key->sk_argument, + Int32GetDatum(key->sk_strategy), + ObjectIdGetDatum(key->sk_subtype), + PointerGetDatum(&recheck)); - if (dir != ForwardScanDirection) - elog(ERROR, "GiST doesn't support other scan directions than forward"); + if (!DatumGetBool(test)) + return false; + *recheck_p |= recheck; + } - /* - * If we have produced an index tuple in the past and the executor has - * informed us we need to mark it as "killed", do so now. - */ - if (scan->kill_prior_tuple && ItemPointerIsValid(&(so->curpos))) - killtuple(scan->indexRelation, so, &(so->curpos)); + key++; + keySize--; + } - /* - * Get the next tuple that matches the search key. - */ - res = (gistnext(scan, NULL) > 0); + /* OK, it passes --- now let's compute the distances */ + key = scan->orderByData; + distance_p = so->distances; + keySize = scan->numberOfOrderBys; + while (keySize > 0) + { + Datum datum; + bool isNull; - PG_RETURN_BOOL(res); -} + datum = index_getattr(tuple, + key->sk_attno, + giststate->tupdesc, + &isNull); -Datum -gistgetbitmap(PG_FUNCTION_ARGS) -{ - IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); - TIDBitmap *tbm = (TIDBitmap *) PG_GETARG_POINTER(1); - int64 ntids; + if ((key->sk_flags & SK_ISNULL) || isNull) + { + /* Assume distance computes as null and sorts to the end */ + *distance_p = get_float8_infinity(); + } + else + { + Datum dist; + GISTENTRY de; - ntids = gistnext(scan, tbm); + gistdentryinit(giststate, key->sk_attno - 1, &de, + datum, r, page, offset, + FALSE, isNull); - PG_RETURN_INT64(ntids); + /* + * Call the Distance function to evaluate the distance. The + * arguments are the index datum (as a GISTENTRY*), the comparison + * datum, and the ordering operator's strategy number and subtype + * from pg_amop. + * + * (Presently there's no need to pass the subtype since it'll + * always be zero, but might as well pass it for possible future + * use.) + * + * Note that Distance functions don't get a recheck argument. + * We can't tolerate lossy distance calculations on leaf tuples; + * there is no opportunity to re-sort the tuples afterwards. + */ + dist = FunctionCall4(&key->sk_func, + PointerGetDatum(&de), + key->sk_argument, + Int32GetDatum(key->sk_strategy), + ObjectIdGetDatum(key->sk_subtype)); + + *distance_p = DatumGetFloat8(dist); + } + + key++; + distance_p++; + keySize--; + } + + return true; } /* - * Fetch tuple(s) that match the search key; this can be invoked - * either to fetch the first such tuple or subsequent matching tuples. + * Scan all items on the GiST index page identified by *pageItem, and insert + * them into the queue (or directly to output areas) * - * This function is used by both gistgettuple and gistgetbitmap. When - * invoked from gistgettuple, tbm is null and the next matching tuple - * is returned in scan->xs_ctup.t_self. When invoked from getbitmap, - * tbm is non-null and all matching tuples are added to tbm before - * returning. In both cases, the function result is the number of - * returned tuples. + * scan: index scan we are executing + * pageItem: search queue item identifying an index page to scan + * myDistances: distances array associated with pageItem, or NULL at the root + * tbm: if not NULL, gistgetbitmap's output bitmap + * ntids: if not NULL, gistgetbitmap's output tuple counter * - * If scan specifies to skip killed tuples, continue looping until we find a - * non-killed tuple that matches the search key. + * If tbm/ntids aren't NULL, we are doing an amgetbitmap scan, and heap + * tuples should be reported directly into the bitmap. If they are NULL, + * we're doing a plain or ordered indexscan. For a plain indexscan, heap + * tuple TIDs are returned into so->pageData[]. For an ordered indexscan, + * heap tuple TIDs are pushed into individual search queue items. + * + * If we detect that the index page has split since we saw its downlink + * in the parent, we push its new right sibling onto the queue so the + * sibling will be processed next. */ -static int64 -gistnext(IndexScanDesc scan, TIDBitmap *tbm) +static void +gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem, double *myDistances, + TIDBitmap *tbm, int64 *ntids) { - Page p; - OffsetNumber n; - GISTScanOpaque so; - GISTSearchStack *stk; - IndexTuple it; + GISTScanOpaque so = (GISTScanOpaque) scan->opaque; + Buffer buffer; + Page page; GISTPageOpaque opaque; - int64 ntids = 0; + OffsetNumber maxoff; + OffsetNumber i; + GISTSearchTreeItem *tmpItem = so->tmpTreeItem; + bool isNew; + MemoryContext oldcxt; - so = (GISTScanOpaque) scan->opaque; + Assert(!GISTSearchItemIsHeap(*pageItem)); - if (so->qual_ok == false) - return 0; + buffer = ReadBuffer(scan->indexRelation, pageItem->blkno); + LockBuffer(buffer, GIST_SHARE); + gistcheckpage(scan->indexRelation, buffer); + page = BufferGetPage(buffer); + opaque = GistPageGetOpaque(page); - if (so->curbuf == InvalidBuffer) + /* check if page split occurred since visit to parent */ + if (!XLogRecPtrIsInvalid(pageItem->data.parentlsn) && + XLByteLT(pageItem->data.parentlsn, opaque->nsn) && + opaque->rightlink != InvalidBlockNumber /* sanity check */ ) { - if (ItemPointerIsValid(&so->curpos) == false) - { - /* Being asked to fetch the first entry, so start at the root */ - Assert(so->curbuf == InvalidBuffer); - Assert(so->stack == NULL); + /* There was a page split, follow right link to add pages */ + GISTSearchItem *item; - so->curbuf = ReadBuffer(scan->indexRelation, GIST_ROOT_BLKNO); + /* This can't happen when starting at the root */ + Assert(myDistances != NULL); - stk = so->stack = (GISTSearchStack *) palloc0(sizeof(GISTSearchStack)); + oldcxt = MemoryContextSwitchTo(so->queueCxt); - stk->next = NULL; - stk->block = GIST_ROOT_BLKNO; + /* Create new GISTSearchItem for the right sibling index page */ + item = palloc(sizeof(GISTSearchItem)); + item->next = NULL; + item->blkno = opaque->rightlink; + item->data.parentlsn = pageItem->data.parentlsn; - pgstat_count_index_scan(scan->indexRelation); - } - else - { - /* scan is finished */ - return 0; - } + /* Insert it into the queue using same distances as for this page */ + tmpItem->head = item; + tmpItem->lastHeap = NULL; + memcpy(tmpItem->distances, myDistances, + sizeof(double) * scan->numberOfOrderBys); + + (void) rb_insert(so->queue, (RBNode *) tmpItem, &isNew); + + MemoryContextSwitchTo(oldcxt); } + so->nPageData = so->curPageData = 0; + /* - * check stored pointers from last visit + * check all tuples on page */ - if (so->nPageData > 0) + maxoff = PageGetMaxOffsetNumber(page); + for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) { + IndexTuple it = (IndexTuple) PageGetItem(page, PageGetItemId(page, i)); + bool match; + bool recheck; + /* - * gistgetmulti never should go here + * Must call gistindex_keytest in tempCxt, and clean up any leftover + * junk afterward. */ - Assert(tbm == NULL); + oldcxt = MemoryContextSwitchTo(so->tempCxt); - if (so->curPageData < so->nPageData) - { - scan->xs_ctup.t_self = so->pageData[so->curPageData].heapPtr; - scan->xs_recheck = so->pageData[so->curPageData].recheck; + match = gistindex_keytest(scan, it, page, i, &recheck); - ItemPointerSet(&so->curpos, - BufferGetBlockNumber(so->curbuf), - so->pageData[so->curPageData].pageOffset); + MemoryContextSwitchTo(oldcxt); + MemoryContextReset(so->tempCxt); - so->curPageData++; + /* Ignore tuple if it doesn't match */ + if (!match) + continue; - return 1; + if (tbm && GistPageIsLeaf(page)) + { + /* + * getbitmap scan, so just push heap tuple TIDs into the bitmap + * without worrying about ordering + */ + tbm_add_tuples(tbm, &it->t_tid, 1, recheck); + (*ntids)++; + } + else if (scan->numberOfOrderBys == 0 && GistPageIsLeaf(page)) + { + /* + * Non-ordered scan, so report heap tuples in so->pageData[] + */ + so->pageData[so->nPageData].heapPtr = it->t_tid; + so->pageData[so->nPageData].recheck = recheck; + so->nPageData++; } else { /* - * Go to the next page + * Must push item into search queue. We get here for any lower + * index page, and also for heap tuples if doing an ordered + * search. */ - stk = so->stack->next; - pfree(so->stack); - so->stack = stk; + GISTSearchItem *item; + + oldcxt = MemoryContextSwitchTo(so->queueCxt); - /* If we're out of stack entries, we're done */ - if (so->stack == NULL) + /* Create new GISTSearchItem for this item */ + item = palloc(sizeof(GISTSearchItem)); + item->next = NULL; + + if (GistPageIsLeaf(page)) { - ReleaseBuffer(so->curbuf); - so->curbuf = InvalidBuffer; - return 0; + /* Creating heap-tuple GISTSearchItem */ + item->blkno = InvalidBlockNumber; + item->data.heap.heapPtr = it->t_tid; + item->data.heap.recheck = recheck; } + else + { + /* Creating index-page GISTSearchItem */ + item->blkno = ItemPointerGetBlockNumber(&it->t_tid); + /* lsn of current page is lsn of parent page for child */ + item->data.parentlsn = PageGetLSN(page); + } + + /* Insert it into the queue using new distance data */ + tmpItem->head = item; + tmpItem->lastHeap = GISTSearchItemIsHeap(*item) ? item : NULL; + memcpy(tmpItem->distances, so->distances, + sizeof(double) * scan->numberOfOrderBys); - so->curbuf = ReleaseAndReadBuffer(so->curbuf, - scan->indexRelation, - stk->block); + (void) rb_insert(so->queue, (RBNode *) tmpItem, &isNew); + + MemoryContextSwitchTo(oldcxt); } } + UnlockReleaseBuffer(buffer); +} + +/* + * Extract next item (in order) from search queue + * + * Returns a GISTSearchItem or NULL. Caller must pfree item when done with it. + * + * NOTE: on successful return, so->curTreeItem is the GISTSearchTreeItem that + * contained the result item. Callers can use so->curTreeItem->distances as + * the distances value for the item. + */ +static GISTSearchItem * +getNextGISTSearchItem(GISTScanOpaque so) +{ for (;;) { - CHECK_FOR_INTERRUPTS(); + GISTSearchItem *item; - /* First of all, we need lock buffer */ - Assert(so->curbuf != InvalidBuffer); - LockBuffer(so->curbuf, GIST_SHARE); - gistcheckpage(scan->indexRelation, so->curbuf); - p = BufferGetPage(so->curbuf); - opaque = GistPageGetOpaque(p); - - /* remember lsn to identify page changed for tuple's killing */ - so->stack->lsn = PageGetLSN(p); - - /* check page split, occured since visit to parent */ - if (!XLogRecPtrIsInvalid(so->stack->parentlsn) && - XLByteLT(so->stack->parentlsn, opaque->nsn) && - opaque->rightlink != InvalidBlockNumber /* sanity check */ && - (so->stack->next == NULL || so->stack->next->block != opaque->rightlink) /* check if already - added */ ) + /* Update curTreeItem if we don't have one */ + if (so->curTreeItem == NULL) { - /* detect page split, follow right link to add pages */ - - stk = (GISTSearchStack *) palloc(sizeof(GISTSearchStack)); - stk->next = so->stack->next; - stk->block = opaque->rightlink; - stk->parentlsn = so->stack->parentlsn; - memset(&(stk->lsn), 0, sizeof(GistNSN)); - so->stack->next = stk; + so->curTreeItem = (GISTSearchTreeItem *) rb_leftmost(so->queue); + /* Done when tree is empty */ + if (so->curTreeItem == NULL) + break; } - /* if page is empty, then just skip it */ - if (PageIsEmpty(p)) + item = so->curTreeItem->head; + if (item != NULL) { - LockBuffer(so->curbuf, GIST_UNLOCK); - stk = so->stack->next; - pfree(so->stack); - so->stack = stk; - - if (so->stack == NULL) - { - ReleaseBuffer(so->curbuf); - so->curbuf = InvalidBuffer; - return ntids; - } - - so->curbuf = ReleaseAndReadBuffer(so->curbuf, scan->indexRelation, - stk->block); - continue; + /* Delink item from chain */ + so->curTreeItem->head = item->next; + /* Return item; caller is responsible to pfree it */ + return item; } - n = FirstOffsetNumber; - - /* wonderful, we can look at page */ - so->nPageData = so->curPageData = 0; - - for (;;) - { - n = gistfindnext(scan, n); - - if (!OffsetNumberIsValid(n)) - { - /* - * If we was called from gistgettuple and current buffer - * contains something matched then make a recursive call - it - * will return ItemPointer from so->pageData. But we save - * buffer pinned to support tuple's killing - */ - if (!tbm && so->nPageData > 0) - { - LockBuffer(so->curbuf, GIST_UNLOCK); - return gistnext(scan, NULL); - } + /* curTreeItem is exhausted, so remove it from rbtree */ + rb_delete(so->queue, (RBNode *) so->curTreeItem); + so->curTreeItem = NULL; + } - /* - * We ran out of matching index entries on the current page, - * so pop the top stack entry and use it to continue the - * search. - */ - LockBuffer(so->curbuf, GIST_UNLOCK); - stk = so->stack->next; - pfree(so->stack); - so->stack = stk; - - /* If we're out of stack entries, we're done */ - - if (so->stack == NULL) - { - ReleaseBuffer(so->curbuf); - so->curbuf = InvalidBuffer; - return ntids; - } - - so->curbuf = ReleaseAndReadBuffer(so->curbuf, - scan->indexRelation, - stk->block); - /* XXX go up */ - break; - } + return NULL; +} - if (GistPageIsLeaf(p)) - { - /* - * We've found a matching index entry in a leaf page, so - * return success. Note that we keep "curbuf" pinned so that - * we can efficiently resume the index scan later. - */ +/* + * Fetch next heap tuple in an ordered search + */ +static bool +getNextNearest(IndexScanDesc scan) +{ + GISTScanOpaque so = (GISTScanOpaque) scan->opaque; + bool res = false; - if (!(scan->ignore_killed_tuples && - ItemIdIsDead(PageGetItemId(p, n)))) - { - it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n)); - ntids++; - if (tbm != NULL) - tbm_add_tuples(tbm, &it->t_tid, 1, scan->xs_recheck); - else - { - so->pageData[so->nPageData].heapPtr = it->t_tid; - so->pageData[so->nPageData].pageOffset = n; - so->pageData[so->nPageData].recheck = scan->xs_recheck; - so->nPageData++; - } - } - } - else - { - /* - * We've found an entry in an internal node whose key is - * consistent with the search key, so push it to stack - */ - stk = (GISTSearchStack *) palloc(sizeof(GISTSearchStack)); + do + { + GISTSearchItem *item = getNextGISTSearchItem(so); - it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n)); - stk->block = ItemPointerGetBlockNumber(&(it->t_tid)); - memset(&(stk->lsn), 0, sizeof(GistNSN)); - stk->parentlsn = so->stack->lsn; + if (!item) + break; - stk->next = so->stack->next; - so->stack->next = stk; - } + if (GISTSearchItemIsHeap(*item)) + { + /* found a heap item at currently minimal distance */ + scan->xs_ctup.t_self = item->data.heap.heapPtr; + scan->xs_recheck = item->data.heap.recheck; + res = true; + } + else + { + /* visit an index page, extract its items into queue */ + CHECK_FOR_INTERRUPTS(); - n = OffsetNumberNext(n); + gistScanPage(scan, item, so->curTreeItem->distances, NULL, NULL); } - } - return ntids; + pfree(item); + } while (!res); + + return res; } /* - * gistindex_keytest() -- does this index tuple satisfy the scan key(s)? - * - * On success return for a leaf tuple, scan->xs_recheck is set to indicate - * whether recheck is needed. We recheck if any of the consistent() functions - * request it. - * - * We must decompress the key in the IndexTuple before passing it to the - * sk_func (and we have previously overwritten the sk_func to use the - * user-defined Consistent method, so we actually are invoking that). - * - * Note that this function is always invoked in a short-lived memory context, - * so we don't need to worry about cleaning up allocated memory, either here - * or in the implementation of any Consistent methods. + * gistgettuple() -- Get the next tuple in the scan */ -static bool -gistindex_keytest(IndexTuple tuple, - IndexScanDesc scan, - OffsetNumber offset) +Datum +gistgettuple(PG_FUNCTION_ARGS) { - int keySize = scan->numberOfKeys; - ScanKey key = scan->keyData; - Relation r = scan->indexRelation; - GISTScanOpaque so; - Page p; - GISTSTATE *giststate; + IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); + GISTScanOpaque so = (GISTScanOpaque) scan->opaque; - so = (GISTScanOpaque) scan->opaque; - giststate = so->giststate; - p = BufferGetPage(so->curbuf); + if (!so->qual_ok) + PG_RETURN_BOOL(false); - scan->xs_recheck = false; + if (so->firstCall) + { + /* Begin the scan by processing the root page */ + GISTSearchItem fakeItem; - /* - * Tuple doesn't restore after crash recovery because of incomplete insert - */ - if (!GistPageIsLeaf(p) && GistTupleIsInvalid(tuple)) - return true; + pgstat_count_index_scan(scan->indexRelation); - while (keySize > 0) - { - Datum datum; - bool isNull; - Datum test; - bool recheck; - GISTENTRY de; + so->firstCall = false; + so->curTreeItem = NULL; + so->curPageData = so->nPageData = 0; - datum = index_getattr(tuple, - key->sk_attno, - giststate->tupdesc, - &isNull); + fakeItem.blkno = GIST_ROOT_BLKNO; + memset(&fakeItem.data.parentlsn, 0, sizeof(GistNSN)); + gistScanPage(scan, &fakeItem, NULL, NULL, NULL); + } - if (key->sk_flags & SK_ISNULL) + if (scan->numberOfOrderBys > 0) + { + /* Must fetch tuples in strict distance order */ + PG_RETURN_BOOL(getNextNearest(scan)); + } + else + { + /* Fetch tuples index-page-at-a-time */ + for (;;) { - /* - * On non-leaf page we can't conclude that child hasn't NULL - * values because of assumption in GiST: union (VAL, NULL) is VAL. - * But if on non-leaf page key IS NULL, then all children are - * NULL. - */ - if (key->sk_flags & SK_SEARCHNULL) + if (so->curPageData < so->nPageData) { - if (GistPageIsLeaf(p) && !isNull) - return false; + /* continuing to return tuples from a leaf page */ + scan->xs_ctup.t_self = so->pageData[so->curPageData].heapPtr; + scan->xs_recheck = so->pageData[so->curPageData].recheck; + so->curPageData++; + PG_RETURN_BOOL(true); } - else + + /* find and process the next index page */ + do { - Assert(key->sk_flags & SK_SEARCHNOTNULL); - if (isNull) - return false; - } - } - else if (isNull) - { - return false; - } - else - { - gistdentryinit(giststate, key->sk_attno - 1, &de, - datum, r, p, offset, - FALSE, isNull); + GISTSearchItem *item = getNextGISTSearchItem(so); - /* - * Call the Consistent function to evaluate the test. The - * arguments are the index datum (as a GISTENTRY*), the comparison - * datum, the comparison operator's strategy number and subtype - * from pg_amop, and the recheck flag. - * - * (Presently there's no need to pass the subtype since it'll - * always be zero, but might as well pass it for possible future - * use.) - * - * We initialize the recheck flag to true (the safest assumption) - * in case the Consistent function forgets to set it. - */ - recheck = true; + if (!item) + PG_RETURN_BOOL(false); - test = FunctionCall5(&key->sk_func, - PointerGetDatum(&de), - key->sk_argument, - Int32GetDatum(key->sk_strategy), - ObjectIdGetDatum(key->sk_subtype), - PointerGetDatum(&recheck)); + CHECK_FOR_INTERRUPTS(); - if (!DatumGetBool(test)) - return false; - scan->xs_recheck |= recheck; - } + /* + * While scanning a leaf page, ItemPointers of matching heap + * tuples are stored in so->pageData. If there are any on + * this page, we fall out of the inner "do" and loop around + * to return them. + */ + gistScanPage(scan, item, so->curTreeItem->distances, NULL, NULL); - keySize--; - key++; + pfree(item); + } while (so->nPageData == 0); + } } - return true; + PG_RETURN_BOOL(false); /* keep compiler quiet */ } /* - * Return the offset of the first index entry that is consistent with - * the search key after offset 'n' in the current page. If there are - * no more consistent entries, return InvalidOffsetNumber. - * On success, scan->xs_recheck is set correctly, too. - * Page should be locked.... + * gistgetbitmap() -- Get a bitmap of all heap tuple locations */ -static OffsetNumber -gistfindnext(IndexScanDesc scan, OffsetNumber n) +Datum +gistgetbitmap(PG_FUNCTION_ARGS) { - OffsetNumber maxoff; - IndexTuple it; - GISTScanOpaque so; - MemoryContext oldcxt; - Page p; + IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); + TIDBitmap *tbm = (TIDBitmap *) PG_GETARG_POINTER(1); + GISTScanOpaque so = (GISTScanOpaque) scan->opaque; + int64 ntids = 0; + GISTSearchItem fakeItem; + + if (!so->qual_ok) + PG_RETURN_INT64(0); + + pgstat_count_index_scan(scan->indexRelation); - so = (GISTScanOpaque) scan->opaque; - p = BufferGetPage(so->curbuf); - maxoff = PageGetMaxOffsetNumber(p); + /* Begin the scan by processing the root page */ + so->curTreeItem = NULL; + so->curPageData = so->nPageData = 0; + + fakeItem.blkno = GIST_ROOT_BLKNO; + memset(&fakeItem.data.parentlsn, 0, sizeof(GistNSN)); + gistScanPage(scan, &fakeItem, NULL, tbm, &ntids); /* - * Make sure we're in a short-lived memory context when we invoke a - * user-supplied GiST method in gistindex_keytest(), so we don't leak - * memory + * While scanning a leaf page, ItemPointers of matching heap tuples will + * be stored directly into tbm, so we don't need to deal with them here. */ - oldcxt = MemoryContextSwitchTo(so->tempCxt); - - while (n >= FirstOffsetNumber && n <= maxoff) + for (;;) { - it = (IndexTuple) PageGetItem(p, PageGetItemId(p, n)); - if (gistindex_keytest(it, scan, n)) + GISTSearchItem *item = getNextGISTSearchItem(so); + + if (!item) break; - n = OffsetNumberNext(n); - } + CHECK_FOR_INTERRUPTS(); - MemoryContextSwitchTo(oldcxt); - MemoryContextReset(so->tempCxt); + gistScanPage(scan, item, so->curTreeItem->distances, tbm, &ntids); - /* - * If we found a matching entry, return its offset; otherwise return - * InvalidOffsetNumber to inform the caller to go to the next page. - */ - if (n >= FirstOffsetNumber && n <= maxoff) - return n; - else - return InvalidOffsetNumber; + pfree(item); + } + + PG_RETURN_INT64(ntids); } diff --git a/src/backend/access/gist/gistproc.c b/src/backend/access/gist/gistproc.c index 681ffd27d4..bdc84befc6 100644 --- a/src/backend/access/gist/gistproc.c +++ b/src/backend/access/gist/gistproc.c @@ -904,6 +904,76 @@ gist_point_compress(PG_FUNCTION_ARGS) PG_RETURN_POINTER(entry); } +#define point_point_distance(p1,p2) \ + DatumGetFloat8(DirectFunctionCall2(point_distance, \ + PointPGetDatum(p1), PointPGetDatum(p2))) + +static double +computeDistance(bool isLeaf, BOX *box, Point *point) +{ + double result = 0.0; + + if (isLeaf) + { + /* simple point to point distance */ + result = point_point_distance(point, &box->low); + } + else if (point->x <= box->high.x && point->x >= box->low.x && + point->y <= box->high.y && point->y >= box->low.y) + { + /* point inside the box */ + result = 0.0; + } + else if (point->x <= box->high.x && point->x >= box->low.x) + { + /* point is over or below box */ + Assert(box->low.y <= box->high.y); + if (point->y > box->high.y) + result = point->y - box->high.y; + else if (point->y < box->low.y) + result = box->low.y - point->y; + else + elog(ERROR, "inconsistent point values"); + } + else if (point->y <= box->high.y && point->y >= box->low.y) + { + /* point is to left or right of box */ + Assert(box->low.x <= box->high.x); + if (point->x > box->high.x) + result = point->x - box->high.x; + else if (point->x < box->low.x) + result = box->low.x - point->x; + else + elog(ERROR, "inconsistent point values"); + } + else + { + /* closest point will be a vertex */ + Point p; + double subresult; + + result = point_point_distance(point, &box->low); + + subresult = point_point_distance(point, &box->high); + if (result > subresult) + result = subresult; + + p.x = box->low.x; + p.y = box->high.y; + subresult = point_point_distance(point, &p); + if (result > subresult) + result = subresult; + + p.x = box->high.x; + p.y = box->low.y; + subresult = point_point_distance(point, &p); + if (result > subresult) + result = subresult; + } + + return result; +} + static bool gist_point_consistent_internal(StrategyNumber strategy, bool isLeaf, BOX *key, Point *query) @@ -954,8 +1024,8 @@ gist_point_consistent(PG_FUNCTION_ARGS) { GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2); - bool result; bool *recheck = (bool *) PG_GETARG_POINTER(4); + bool result; StrategyNumber strategyGroup = strategy / GeoStrategyNumberOffset; switch (strategyGroup) @@ -1034,9 +1104,32 @@ gist_point_consistent(PG_FUNCTION_ARGS) } break; default: - result = false; /* silence compiler warning */ elog(ERROR, "unknown strategy number: %d", strategy); + result = false; /* keep compiler quiet */ } PG_RETURN_BOOL(result); } + +Datum +gist_point_distance(PG_FUNCTION_ARGS) +{ + GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0); + StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2); + double distance; + StrategyNumber strategyGroup = strategy / GeoStrategyNumberOffset; + + switch (strategyGroup) + { + case PointStrategyNumberGroup: + distance = computeDistance(GIST_LEAF(entry), + DatumGetBoxP(entry->key), + PG_GETARG_POINT_P(1)); + break; + default: + elog(ERROR, "unknown strategy number: %d", strategy); + distance = 0.0; /* keep compiler quiet */ + } + + PG_RETURN_FLOAT8(distance); +} diff --git a/src/backend/access/gist/gistscan.c b/src/backend/access/gist/gistscan.c index 106714511a..546880c01e 100644 --- a/src/backend/access/gist/gistscan.c +++ b/src/backend/access/gist/gistscan.c @@ -20,8 +20,84 @@ #include "access/relscan.h" #include "storage/bufmgr.h" #include "utils/memutils.h" +#include "utils/rel.h" -static void gistfreestack(GISTSearchStack *s); + +/* + * RBTree support functions for the GISTSearchTreeItem queue + */ + +static int +GISTSearchTreeItemComparator(const RBNode *a, const RBNode *b, void *arg) +{ + const GISTSearchTreeItem *sa = (const GISTSearchTreeItem *) a; + const GISTSearchTreeItem *sb = (const GISTSearchTreeItem *) b; + IndexScanDesc scan = (IndexScanDesc) arg; + int i; + + /* Order according to distance comparison */ + for (i = 0; i < scan->numberOfOrderBys; i++) + { + if (sa->distances[i] != sb->distances[i]) + return (sa->distances[i] > sb->distances[i]) ? 1 : -1; + } + + return 0; +} + +static void +GISTSearchTreeItemCombiner(RBNode *existing, const RBNode *newrb, void *arg) +{ + GISTSearchTreeItem *scurrent = (GISTSearchTreeItem *) existing; + const GISTSearchTreeItem *snew = (const GISTSearchTreeItem *) newrb; + GISTSearchItem *newitem = snew->head; + + /* snew should have just one item in its chain */ + Assert(newitem && newitem->next == NULL); + + /* + * If new item is heap tuple, it goes to front of chain; otherwise insert + * it before the first index-page item, so that index pages are visited + * in LIFO order, ensuring depth-first search of index pages. See + * comments in gist_private.h. + */ + if (GISTSearchItemIsHeap(*newitem)) + { + newitem->next = scurrent->head; + scurrent->head = newitem; + if (scurrent->lastHeap == NULL) + scurrent->lastHeap = newitem; + } + else if (scurrent->lastHeap == NULL) + { + newitem->next = scurrent->head; + scurrent->head = newitem; + } + else + { + newitem->next = scurrent->lastHeap->next; + scurrent->lastHeap->next = newitem; + } +} + +static RBNode * +GISTSearchTreeItemAllocator(void *arg) +{ + IndexScanDesc scan = (IndexScanDesc) arg; + + return palloc(GSTIHDRSZ + sizeof(double) * scan->numberOfOrderBys); +} + +static void +GISTSearchTreeItemDeleter(RBNode *rb, void *arg) +{ + pfree(rb); +} + + +/* + * Index AM API functions for scanning GiST indexes + */ Datum gistbeginscan(PG_FUNCTION_ARGS) @@ -32,18 +108,22 @@ gistbeginscan(PG_FUNCTION_ARGS) IndexScanDesc scan; GISTScanOpaque so; - /* no order by operators allowed */ - Assert(norderbys == 0); - scan = RelationGetIndexScan(r, nkeys, norderbys); /* initialize opaque data */ - so = (GISTScanOpaque) palloc(sizeof(GISTScanOpaqueData)); - so->stack = NULL; + so = (GISTScanOpaque) palloc0(sizeof(GISTScanOpaqueData)); + so->queueCxt = AllocSetContextCreate(CurrentMemoryContext, + "GiST queue context", + ALLOCSET_DEFAULT_MINSIZE, + ALLOCSET_DEFAULT_INITSIZE, + ALLOCSET_DEFAULT_MAXSIZE); so->tempCxt = createTempGistContext(); - so->curbuf = InvalidBuffer; so->giststate = (GISTSTATE *) palloc(sizeof(GISTSTATE)); initGISTstate(so->giststate, scan->indexRelation); + /* workspaces with size dependent on numberOfOrderBys: */ + so->tmpTreeItem = palloc(GSTIHDRSZ + sizeof(double) * scan->numberOfOrderBys); + so->distances = palloc(sizeof(double) * scan->numberOfOrderBys); + so->qual_ok = true; /* in case there are zero keys */ scan->opaque = so; @@ -55,27 +135,27 @@ gistrescan(PG_FUNCTION_ARGS) { IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); ScanKey key = (ScanKey) PG_GETARG_POINTER(1); - /* remaining arguments are ignored */ + ScanKey orderbys = (ScanKey) PG_GETARG_POINTER(3); + /* nkeys and norderbys arguments are ignored */ GISTScanOpaque so = (GISTScanOpaque) scan->opaque; int i; + MemoryContext oldCxt; /* rescan an existing indexscan --- reset state */ - gistfreestack(so->stack); - so->stack = NULL; - /* drop pins on buffers -- no locks held */ - if (BufferIsValid(so->curbuf)) - { - ReleaseBuffer(so->curbuf); - so->curbuf = InvalidBuffer; - } + MemoryContextReset(so->queueCxt); + so->curTreeItem = NULL; - /* - * Clear all the pointers. - */ - ItemPointerSetInvalid(&so->curpos); - so->nPageData = so->curPageData = 0; + /* create new, empty RBTree for search queue */ + oldCxt = MemoryContextSwitchTo(so->queueCxt); + so->queue = rb_create(GSTIHDRSZ + sizeof(double) * scan->numberOfOrderBys, + GISTSearchTreeItemComparator, + GISTSearchTreeItemCombiner, + GISTSearchTreeItemAllocator, + GISTSearchTreeItemDeleter, + scan); + MemoryContextSwitchTo(oldCxt); - so->qual_ok = true; + so->firstCall = true; /* Update scan key, if a new one is given */ if (key && scan->numberOfKeys > 0) @@ -84,7 +164,7 @@ gistrescan(PG_FUNCTION_ARGS) scan->numberOfKeys * sizeof(ScanKeyData)); /* - * Modify the scan key so that all the Consistent method is called for + * Modify the scan key so that the Consistent method is called for * all comparisons. The original operator is passed to the Consistent * function in the form of its strategy number, which is available * from the sk_strategy field, and its subtype from the sk_subtype @@ -94,9 +174,11 @@ gistrescan(PG_FUNCTION_ARGS) * SK_SEARCHNULL/SK_SEARCHNOTNULL then nothing can be found (ie, we * assume all indexable operators are strict). */ + so->qual_ok = true; + for (i = 0; i < scan->numberOfKeys; i++) { - ScanKey skey = &(scan->keyData[i]); + ScanKey skey = scan->keyData + i; skey->sk_func = so->giststate->consistentFn[skey->sk_attno - 1]; @@ -108,6 +190,33 @@ gistrescan(PG_FUNCTION_ARGS) } } + /* Update order-by key, if a new one is given */ + if (orderbys && scan->numberOfOrderBys > 0) + { + memmove(scan->orderByData, orderbys, + scan->numberOfOrderBys * sizeof(ScanKeyData)); + + /* + * Modify the order-by key so that the Distance method is called for + * all comparisons. The original operator is passed to the Distance + * function in the form of its strategy number, which is available + * from the sk_strategy field, and its subtype from the sk_subtype + * field. + */ + for (i = 0; i < scan->numberOfOrderBys; i++) + { + ScanKey skey = scan->orderByData + i; + + skey->sk_func = so->giststate->distanceFn[skey->sk_attno - 1]; + + /* Check we actually have a distance function ... */ + if (!OidIsValid(skey->sk_func.fn_oid)) + elog(ERROR, "missing support function %d for attribute %d of index \"%s\"", + GIST_DISTANCE_PROC, skey->sk_attno, + RelationGetRelationName(scan->indexRelation)); + } + } + PG_RETURN_VOID(); } @@ -131,26 +240,12 @@ gistendscan(PG_FUNCTION_ARGS) IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); GISTScanOpaque so = (GISTScanOpaque) scan->opaque; - gistfreestack(so->stack); - if (so->giststate != NULL) - freeGISTstate(so->giststate); - /* drop pins on buffers -- we aren't holding any locks */ - if (BufferIsValid(so->curbuf)) - ReleaseBuffer(so->curbuf); + freeGISTstate(so->giststate); + MemoryContextDelete(so->queueCxt); MemoryContextDelete(so->tempCxt); + pfree(so->tmpTreeItem); + pfree(so->distances); pfree(so); PG_RETURN_VOID(); } - -static void -gistfreestack(GISTSearchStack *s) -{ - while (s != NULL) - { - GISTSearchStack *p = s->next; - - pfree(s); - s = p; - } -} diff --git a/src/include/access/gist.h b/src/include/access/gist.h index 0d9449b991..01fddb94af 100644 --- a/src/include/access/gist.h +++ b/src/include/access/gist.h @@ -32,7 +32,8 @@ #define GIST_PENALTY_PROC 5 #define GIST_PICKSPLIT_PROC 6 #define GIST_EQUAL_PROC 7 -#define GISTNProcs 7 +#define GIST_DISTANCE_PROC 8 +#define GISTNProcs 8 /* * strategy numbers for GiST opclasses that want to implement the old @@ -52,6 +53,7 @@ #define RTOverAboveStrategyNumber 12 #define RTOldContainsStrategyNumber 13 /* for old spelling of @> */ #define RTOldContainedByStrategyNumber 14 /* for old spelling of <@ */ +#define RTKNNSearchStrategyNumber 15 /* * Page opaque data in a GiST index page. @@ -131,13 +133,13 @@ typedef struct GISTENTRY #define GistClearTuplesDeleted(page) ( GistPageGetOpaque(page)->flags &= ~F_TUPLES_DELETED) /* - * Vector of GISTENTRY structs; user-defined methods union and pick - * split takes it as one of their arguments + * Vector of GISTENTRY structs; user-defined methods union and picksplit + * take it as one of their arguments */ typedef struct { int32 n; /* number of elements */ - GISTENTRY vector[1]; + GISTENTRY vector[1]; /* variable-length array */ } GistEntryVector; #define GEVHDRSZ (offsetof(GistEntryVector, vector)) diff --git a/src/include/access/gist_private.h b/src/include/access/gist_private.h index f2dcbfb39b..1a0e1ea72e 100644 --- a/src/include/access/gist_private.h +++ b/src/include/access/gist_private.h @@ -17,34 +17,19 @@ #include "access/gist.h" #include "access/itup.h" #include "storage/bufmgr.h" +#include "utils/rbtree.h" -#define GIST_UNLOCK BUFFER_LOCK_UNLOCK +/* Buffer lock modes */ #define GIST_SHARE BUFFER_LOCK_SHARE #define GIST_EXCLUSIVE BUFFER_LOCK_EXCLUSIVE - +#define GIST_UNLOCK BUFFER_LOCK_UNLOCK /* - * XXX old comment!!! - * When we descend a tree, we keep a stack of parent pointers. This - * allows us to follow a chain of internal node points until we reach - * a leaf node, and then back up the stack to re-examine the internal - * nodes. + * GISTSTATE: information needed for any GiST index operation * - * 'parent' is the previous stack entry -- i.e. the node we arrived - * from. 'block' is the node's block number. 'offset' is the offset in - * the node's page that we stopped at (i.e. we followed the child - * pointer located at the specified offset). + * This struct retains call info for the index's opclass-specific support + * functions (per index column), plus the index's tuple descriptor. */ -typedef struct GISTSearchStack -{ - struct GISTSearchStack *next; - BlockNumber block; - /* to identify page changed */ - GistNSN lsn; - /* to recognize split occured */ - GistNSN parentlsn; -} GISTSearchStack; - typedef struct GISTSTATE { FmgrInfo consistentFn[INDEX_MAX_KEYS]; @@ -54,38 +39,96 @@ typedef struct GISTSTATE FmgrInfo penaltyFn[INDEX_MAX_KEYS]; FmgrInfo picksplitFn[INDEX_MAX_KEYS]; FmgrInfo equalFn[INDEX_MAX_KEYS]; + FmgrInfo distanceFn[INDEX_MAX_KEYS]; TupleDesc tupdesc; } GISTSTATE; -typedef struct ItemResult + +/* + * During a GiST index search, we must maintain a queue of unvisited items, + * which can be either individual heap tuples or whole index pages. If it + * is an ordered search, the unvisited items should be visited in distance + * order. Unvisited items at the same distance should be visited in + * depth-first order, that is heap items first, then lower index pages, then + * upper index pages; this rule avoids doing extra work during a search that + * ends early due to LIMIT. + * + * To perform an ordered search, we use an RBTree to manage the distance-order + * queue. Each GISTSearchTreeItem stores all unvisited items of the same + * distance; they are GISTSearchItems chained together via their next fields. + * + * In a non-ordered search (no order-by operators), the RBTree degenerates + * to a single item, which we use as a queue of unvisited index pages only. + * In this case matched heap items from the current index leaf page are + * remembered in GISTScanOpaqueData.pageData[] and returned directly from + * there, instead of building a separate GISTSearchItem for each one. + */ + +/* Individual heap tuple to be visited */ +typedef struct GISTSearchHeapItem { ItemPointerData heapPtr; - OffsetNumber pageOffset; /* offset in index page */ - bool recheck; -} ItemResult; + bool recheck; /* T if quals must be rechecked */ +} GISTSearchHeapItem; + +/* Unvisited item, either index page or heap tuple */ +typedef struct GISTSearchItem +{ + struct GISTSearchItem *next; /* list link */ + BlockNumber blkno; /* index page number, or InvalidBlockNumber */ + union + { + GistNSN parentlsn; /* parent page's LSN, if index page */ + /* we must store parentlsn to detect whether a split occurred */ + GISTSearchHeapItem heap; /* heap info, if heap tuple */ + } data; +} GISTSearchItem; + +#define GISTSearchItemIsHeap(item) ((item).blkno == InvalidBlockNumber) /* - * When we're doing a scan, we need to keep track of the parent stack - * for the marked and current items. + * Within a GISTSearchTreeItem's chain, heap items always appear before + * index-page items, since we want to visit heap items first. lastHeap points + * to the last heap item in the chain, or is NULL if there are none. + */ +typedef struct GISTSearchTreeItem +{ + RBNode rbnode; /* this is an RBTree item */ + GISTSearchItem *head; /* first chain member */ + GISTSearchItem *lastHeap; /* last heap-tuple member, if any */ + double distances[1]; /* array with numberOfOrderBys entries */ +} GISTSearchTreeItem; + +#define GSTIHDRSZ offsetof(GISTSearchTreeItem, distances) + +/* + * GISTScanOpaqueData: private state for a scan of a GiST index */ typedef struct GISTScanOpaqueData { - GISTSearchStack *stack; - GISTSearchStack *markstk; + GISTSTATE *giststate; /* index information, see above */ + RBTree *queue; /* queue of unvisited items */ + MemoryContext queueCxt; /* context holding the queue */ + MemoryContext tempCxt; /* workspace context for calling functions */ bool qual_ok; /* false if qual can never be satisfied */ - GISTSTATE *giststate; - MemoryContext tempCxt; - Buffer curbuf; - ItemPointerData curpos; - - ItemResult pageData[BLCKSZ / sizeof(IndexTupleData)]; - OffsetNumber nPageData; - OffsetNumber curPageData; + bool firstCall; /* true until first gistgettuple call */ + + GISTSearchTreeItem *curTreeItem; /* current queue item, if any */ + + /* pre-allocated workspace arrays */ + GISTSearchTreeItem *tmpTreeItem; /* workspace to pass to rb_insert */ + double *distances; /* workspace for computeKeyTupleDistance */ + + /* In a non-ordered search, returnable heap items are stored here: */ + GISTSearchHeapItem pageData[BLCKSZ / sizeof(IndexTupleData)]; + OffsetNumber nPageData; /* number of valid items in array */ + OffsetNumber curPageData; /* next item to return */ } GISTScanOpaqueData; typedef GISTScanOpaqueData *GISTScanOpaque; + /* XLog stuff */ #define XLOG_GIST_PAGE_UPDATE 0x00 diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h index f28162b439..6c12f7cbf8 100644 --- a/src/include/catalog/catversion.h +++ b/src/include/catalog/catversion.h @@ -53,6 +53,6 @@ */ /* yyyymmddN */ -#define CATALOG_VERSION_NO 201012021 +#define CATALOG_VERSION_NO 201012031 #endif diff --git a/src/include/catalog/pg_am.h b/src/include/catalog/pg_am.h index a729690aff..9425329e94 100644 --- a/src/include/catalog/pg_am.h +++ b/src/include/catalog/pg_am.h @@ -117,7 +117,7 @@ DESCR("b-tree index access method"); DATA(insert OID = 405 ( hash 1 1 f f t f f f f f f f 23 hashinsert hashbeginscan hashgettuple hashgetbitmap hashrescan hashendscan hashmarkpos hashrestrpos hashbuild hashbulkdelete hashvacuumcleanup hashcostestimate hashoptions )); DESCR("hash index access method"); #define HASH_AM_OID 405 -DATA(insert OID = 783 ( gist 0 7 f f f f t t t t t t 0 gistinsert gistbeginscan gistgettuple gistgetbitmap gistrescan gistendscan gistmarkpos gistrestrpos gistbuild gistbulkdelete gistvacuumcleanup gistcostestimate gistoptions )); +DATA(insert OID = 783 ( gist 0 8 f t f f t t t t t t 0 gistinsert gistbeginscan gistgettuple gistgetbitmap gistrescan gistendscan gistmarkpos gistrestrpos gistbuild gistbulkdelete gistvacuumcleanup gistcostestimate gistoptions )); DESCR("GiST index access method"); #define GIST_AM_OID 783 DATA(insert OID = 2742 ( gin 0 5 f f f f t t f f t f 0 gininsert ginbeginscan - gingetbitmap ginrescan ginendscan ginmarkpos ginrestrpos ginbuild ginbulkdelete ginvacuumcleanup gincostestimate ginoptions )); diff --git a/src/include/catalog/pg_amop.h b/src/include/catalog/pg_amop.h index 951f9cd6f2..4e0727e8e7 100644 --- a/src/include/catalog/pg_amop.h +++ b/src/include/catalog/pg_amop.h @@ -604,6 +604,7 @@ DATA(insert ( 1029 600 600 1 s 507 783 0 )); DATA(insert ( 1029 600 600 5 s 508 783 0 )); DATA(insert ( 1029 600 600 10 s 509 783 0 )); DATA(insert ( 1029 600 600 6 s 510 783 0 )); +DATA(insert ( 1029 600 600 15 o 517 783 1970 )); DATA(insert ( 1029 603 600 27 s 433 783 0 )); DATA(insert ( 1029 600 603 28 s 511 783 0 )); DATA(insert ( 1029 604 600 47 s 757 783 0 )); diff --git a/src/include/catalog/pg_amproc.h b/src/include/catalog/pg_amproc.h index f7a0fbe07f..4e2bfbab93 100644 --- a/src/include/catalog/pg_amproc.h +++ b/src/include/catalog/pg_amproc.h @@ -205,6 +205,7 @@ DATA(insert ( 1029 600 600 4 2580 )); DATA(insert ( 1029 600 600 5 2581 )); DATA(insert ( 1029 600 600 6 2582 )); DATA(insert ( 1029 600 600 7 2584 )); +DATA(insert ( 1029 600 600 8 3064 )); /* gin */ diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index 611adef83c..feae22e896 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -4325,7 +4325,9 @@ DATA(insert OID = 2592 ( gist_circle_compress PGNSP PGUID 12 1 0 0 f f f t f i DESCR("GiST support"); DATA(insert OID = 1030 ( gist_point_compress PGNSP PGUID 12 1 0 0 f f f t f i 1 0 2281 "2281" _null_ _null_ _null_ _null_ gist_point_compress _null_ _null_ _null_ )); DESCR("GiST support"); -DATA(insert OID = 2179 ( gist_point_consistent PGNSP PGUID 12 1 0 0 f f f t f i 5 0 16 "2281 603 23 26 2281" _null_ _null_ _null_ _null_ gist_point_consistent _null_ _null_ _null_ )); +DATA(insert OID = 2179 ( gist_point_consistent PGNSP PGUID 12 1 0 0 f f f t f i 5 0 16 "2281 600 23 26 2281" _null_ _null_ _null_ _null_ gist_point_consistent _null_ _null_ _null_ )); +DESCR("GiST support"); +DATA(insert OID = 3064 ( gist_point_distance PGNSP PGUID 12 1 0 0 f f f t f i 4 0 701 "2281 600 23 26" _null_ _null_ _null_ _null_ gist_point_distance _null_ _null_ _null_ )); DESCR("GiST support"); /* GIN */ diff --git a/src/include/utils/geo_decls.h b/src/include/utils/geo_decls.h index d84f369854..24921fe460 100644 --- a/src/include/utils/geo_decls.h +++ b/src/include/utils/geo_decls.h @@ -424,6 +424,7 @@ extern Datum gist_circle_compress(PG_FUNCTION_ARGS); extern Datum gist_circle_consistent(PG_FUNCTION_ARGS); extern Datum gist_point_compress(PG_FUNCTION_ARGS); extern Datum gist_point_consistent(PG_FUNCTION_ARGS); +extern Datum gist_point_distance(PG_FUNCTION_ARGS); /* geo_selfuncs.c */ extern Datum areasel(PG_FUNCTION_ARGS); diff --git a/src/test/regress/expected/create_index.out b/src/test/regress/expected/create_index.out index 27d5e848e5..1d1470a25d 100644 --- a/src/test/regress/expected/create_index.out +++ b/src/test/regress/expected/create_index.out @@ -51,6 +51,7 @@ CREATE INDEX onek2_stu1_prtl ON onek2 USING btree(stringu1 name_ops) CREATE INDEX grect2ind ON fast_emp4000 USING gist (home_base); CREATE INDEX gpolygonind ON polygon_tbl USING gist (f1); CREATE INDEX gcircleind ON circle_tbl USING gist (f1); +INSERT INTO POINT_TBL(f1) VALUES (NULL); CREATE INDEX gpointind ON point_tbl USING gist (f1); CREATE TEMP TABLE gpolygon_tbl AS SELECT polygon(home_base) AS f1 FROM slow_emp4000; @@ -60,6 +61,7 @@ CREATE TEMP TABLE gcircle_tbl AS SELECT circle(home_base) AS f1 FROM slow_emp4000; CREATE INDEX ggpolygonind ON gpolygon_tbl USING gist (f1); CREATE INDEX ggcircleind ON gcircle_tbl USING gist (f1); +-- get non-indexed results for comparison purposes SET enable_seqscan = ON; SET enable_indexscan = OFF; SET enable_bitmapscan = OFF; @@ -167,6 +169,44 @@ SELECT count(*) FROM point_tbl p WHERE p.f1 ~= '(-5, -12)'; 1 (1 row) +SELECT * FROM point_tbl ORDER BY f1 <-> '0,1'; + f1 +------------ + (0,0) + (-3,4) + (-10,0) + (10,10) + (-5,-12) + (5.1,34.5) + +(7 rows) + +SELECT * FROM point_tbl WHERE f1 IS NULL; + f1 +---- + +(1 row) + +SELECT * FROM point_tbl WHERE f1 IS NOT NULL ORDER BY f1 <-> '0,1'; + f1 +------------ + (0,0) + (-3,4) + (-10,0) + (10,10) + (-5,-12) + (5.1,34.5) +(6 rows) + +SELECT * FROM point_tbl WHERE f1 <@ '(-10,-10),(10,10)':: box ORDER BY f1 <-> '0,1'; + f1 +--------- + (0,0) + (-3,4) + (-10,0) + (10,10) +(4 rows) + SET enable_seqscan = OFF; SET enable_indexscan = ON; SET enable_bitmapscan = ON; @@ -435,6 +475,102 @@ SELECT count(*) FROM point_tbl p WHERE p.f1 ~= '(-5, -12)'; 1 (1 row) +EXPLAIN (COSTS OFF) +SELECT * FROM point_tbl ORDER BY f1 <-> '0,1'; + QUERY PLAN +----------------------------------------- + Index Scan using gpointind on point_tbl + Order By: (f1 <-> '(0,1)'::point) +(2 rows) + +SELECT * FROM point_tbl ORDER BY f1 <-> '0,1'; + f1 +------------ + (0,0) + (-3,4) + (-10,0) + (10,10) + (-5,-12) + (5.1,34.5) + +(7 rows) + +EXPLAIN (COSTS OFF) +SELECT * FROM point_tbl WHERE f1 IS NULL; + QUERY PLAN +----------------------------------------- + Index Scan using gpointind on point_tbl + Index Cond: (f1 IS NULL) +(2 rows) + +SELECT * FROM point_tbl WHERE f1 IS NULL; + f1 +---- + +(1 row) + +EXPLAIN (COSTS OFF) +SELECT * FROM point_tbl WHERE f1 IS NOT NULL ORDER BY f1 <-> '0,1'; + QUERY PLAN +----------------------------------------- + Index Scan using gpointind on point_tbl + Index Cond: (f1 IS NOT NULL) + Order By: (f1 <-> '(0,1)'::point) +(3 rows) + +SELECT * FROM point_tbl WHERE f1 IS NOT NULL ORDER BY f1 <-> '0,1'; + f1 +------------ + (0,0) + (-3,4) + (-10,0) + (10,10) + (-5,-12) + (5.1,34.5) +(6 rows) + +EXPLAIN (COSTS OFF) +SELECT * FROM point_tbl WHERE f1 <@ '(-10,-10),(10,10)':: box ORDER BY f1 <-> '0,1'; + QUERY PLAN +------------------------------------------------ + Index Scan using gpointind on point_tbl + Index Cond: (f1 <@ '(10,10),(-10,-10)'::box) + Order By: (f1 <-> '(0,1)'::point) +(3 rows) + +SELECT * FROM point_tbl WHERE f1 <@ '(-10,-10),(10,10)':: box ORDER BY f1 <-> '0,1'; + f1 +--------- + (0,0) + (-3,4) + (-10,0) + (10,10) +(4 rows) + +SET enable_seqscan = OFF; +SET enable_indexscan = OFF; +SET enable_bitmapscan = ON; +EXPLAIN (COSTS OFF) +SELECT * FROM point_tbl WHERE f1 <@ '(-10,-10),(10,10)':: box ORDER BY f1 <-> '0,1'; + QUERY PLAN +------------------------------------------------------------ + Sort + Sort Key: ((f1 <-> '(0,1)'::point)) + -> Bitmap Heap Scan on point_tbl + Recheck Cond: (f1 <@ '(10,10),(-10,-10)'::box) + -> Bitmap Index Scan on gpointind + Index Cond: (f1 <@ '(10,10),(-10,-10)'::box) +(6 rows) + +SELECT * FROM point_tbl WHERE f1 <@ '(-10,-10),(10,10)':: box ORDER BY f1 <-> '0,1'; + f1 +--------- + (0,0) + (-3,4) + (-10,0) + (10,10) +(4 rows) + RESET enable_seqscan; RESET enable_indexscan; RESET enable_bitmapscan; diff --git a/src/test/regress/expected/opr_sanity.out b/src/test/regress/expected/opr_sanity.out index 2176ea47da..f87cfb8009 100644 --- a/src/test/regress/expected/opr_sanity.out +++ b/src/test/regress/expected/opr_sanity.out @@ -991,6 +991,7 @@ ORDER BY 1, 2, 3; 783 | 12 | |&> 783 | 13 | ~ 783 | 14 | @ + 783 | 15 | <-> 783 | 27 | @> 783 | 28 | <@ 783 | 47 | @> @@ -1003,7 +1004,7 @@ ORDER BY 1, 2, 3; 2742 | 2 | @@@ 2742 | 3 | <@ 2742 | 4 | = -(39 rows) +(40 rows) -- Check that all opclass search operators have selectivity estimators. -- This is not absolutely required, but it seems a reasonable thing @@ -1136,11 +1137,11 @@ WHERE p1.amprocfamily = p3.oid AND p3.opfmethod = p2.oid AND -- Detect missing pg_amproc entries: should have as many support functions -- as AM expects for each datatype combination supported by the opfamily. --- GIN is a special case because it has an optional support function. +-- GIST/GIN are special cases because each has an optional support function. SELECT p1.amname, p2.opfname, p3.amproclefttype, p3.amprocrighttype FROM pg_am AS p1, pg_opfamily AS p2, pg_amproc AS p3 WHERE p2.opfmethod = p1.oid AND p3.amprocfamily = p2.oid AND - p1.amname <> 'gin' AND + p1.amname <> 'gist' AND p1.amname <> 'gin' AND p1.amsupport != (SELECT count(*) FROM pg_amproc AS p4 WHERE p4.amprocfamily = p2.oid AND p4.amproclefttype = p3.amproclefttype AND @@ -1149,26 +1150,27 @@ WHERE p2.opfmethod = p1.oid AND p3.amprocfamily = p2.oid AND --------+---------+----------------+----------------- (0 rows) --- Similar check for GIN, allowing one optional proc +-- Similar check for GIST/GIN, allowing one optional proc SELECT p1.amname, p2.opfname, p3.amproclefttype, p3.amprocrighttype FROM pg_am AS p1, pg_opfamily AS p2, pg_amproc AS p3 WHERE p2.opfmethod = p1.oid AND p3.amprocfamily = p2.oid AND - p1.amname = 'gin' AND - p1.amsupport - 1 > (SELECT count(*) FROM pg_amproc AS p4 - WHERE p4.amprocfamily = p2.oid AND - p4.amproclefttype = p3.amproclefttype AND - p4.amprocrighttype = p3.amprocrighttype); + (p1.amname = 'gist' OR p1.amname = 'gin') AND + (SELECT count(*) FROM pg_amproc AS p4 + WHERE p4.amprocfamily = p2.oid AND + p4.amproclefttype = p3.amproclefttype AND + p4.amprocrighttype = p3.amprocrighttype) + NOT IN (p1.amsupport, p1.amsupport - 1); amname | opfname | amproclefttype | amprocrighttype --------+---------+----------------+----------------- (0 rows) -- Also, check if there are any pg_opclass entries that don't seem to have --- pg_amproc support. Again, GIN has to be checked separately. +-- pg_amproc support. Again, GIST/GIN have to be checked specially. SELECT amname, opcname, count(*) FROM pg_am am JOIN pg_opclass op ON opcmethod = am.oid LEFT JOIN pg_amproc p ON amprocfamily = opcfamily AND amproclefttype = amprocrighttype AND amproclefttype = opcintype -WHERE am.amname <> 'gin' +WHERE am.amname <> 'gist' AND am.amname <> 'gin' GROUP BY amname, amsupport, opcname, amprocfamily HAVING count(*) != amsupport OR amprocfamily IS NULL; amname | opcname | count @@ -1179,9 +1181,10 @@ SELECT amname, opcname, count(*) FROM pg_am am JOIN pg_opclass op ON opcmethod = am.oid LEFT JOIN pg_amproc p ON amprocfamily = opcfamily AND amproclefttype = amprocrighttype AND amproclefttype = opcintype -WHERE am.amname = 'gin' +WHERE am.amname = 'gist' OR am.amname = 'gin' GROUP BY amname, amsupport, opcname, amprocfamily -HAVING count(*) < amsupport - 1 OR amprocfamily IS NULL; +HAVING (count(*) != amsupport AND count(*) != amsupport - 1) + OR amprocfamily IS NULL; amname | opcname | count --------+---------+------- (0 rows) diff --git a/src/test/regress/sql/create_index.sql b/src/test/regress/sql/create_index.sql index abf222de8e..043f433eb0 100644 --- a/src/test/regress/sql/create_index.sql +++ b/src/test/regress/sql/create_index.sql @@ -76,6 +76,8 @@ CREATE INDEX gpolygonind ON polygon_tbl USING gist (f1); CREATE INDEX gcircleind ON circle_tbl USING gist (f1); +INSERT INTO POINT_TBL(f1) VALUES (NULL); + CREATE INDEX gpointind ON point_tbl USING gist (f1); CREATE TEMP TABLE gpolygon_tbl AS @@ -90,6 +92,8 @@ CREATE INDEX ggpolygonind ON gpolygon_tbl USING gist (f1); CREATE INDEX ggcircleind ON gcircle_tbl USING gist (f1); +-- get non-indexed results for comparison purposes + SET enable_seqscan = ON; SET enable_indexscan = OFF; SET enable_bitmapscan = OFF; @@ -130,6 +134,14 @@ SELECT count(*) FROM point_tbl p WHERE p.f1 >^ '(0.0, 0.0)'; SELECT count(*) FROM point_tbl p WHERE p.f1 ~= '(-5, -12)'; +SELECT * FROM point_tbl ORDER BY f1 <-> '0,1'; + +SELECT * FROM point_tbl WHERE f1 IS NULL; + +SELECT * FROM point_tbl WHERE f1 IS NOT NULL ORDER BY f1 <-> '0,1'; + +SELECT * FROM point_tbl WHERE f1 <@ '(-10,-10),(10,10)':: box ORDER BY f1 <-> '0,1'; + SET enable_seqscan = OFF; SET enable_indexscan = ON; SET enable_bitmapscan = ON; @@ -206,6 +218,30 @@ EXPLAIN (COSTS OFF) SELECT count(*) FROM point_tbl p WHERE p.f1 ~= '(-5, -12)'; SELECT count(*) FROM point_tbl p WHERE p.f1 ~= '(-5, -12)'; +EXPLAIN (COSTS OFF) +SELECT * FROM point_tbl ORDER BY f1 <-> '0,1'; +SELECT * FROM point_tbl ORDER BY f1 <-> '0,1'; + +EXPLAIN (COSTS OFF) +SELECT * FROM point_tbl WHERE f1 IS NULL; +SELECT * FROM point_tbl WHERE f1 IS NULL; + +EXPLAIN (COSTS OFF) +SELECT * FROM point_tbl WHERE f1 IS NOT NULL ORDER BY f1 <-> '0,1'; +SELECT * FROM point_tbl WHERE f1 IS NOT NULL ORDER BY f1 <-> '0,1'; + +EXPLAIN (COSTS OFF) +SELECT * FROM point_tbl WHERE f1 <@ '(-10,-10),(10,10)':: box ORDER BY f1 <-> '0,1'; +SELECT * FROM point_tbl WHERE f1 <@ '(-10,-10),(10,10)':: box ORDER BY f1 <-> '0,1'; + +SET enable_seqscan = OFF; +SET enable_indexscan = OFF; +SET enable_bitmapscan = ON; + +EXPLAIN (COSTS OFF) +SELECT * FROM point_tbl WHERE f1 <@ '(-10,-10),(10,10)':: box ORDER BY f1 <-> '0,1'; +SELECT * FROM point_tbl WHERE f1 <@ '(-10,-10),(10,10)':: box ORDER BY f1 <-> '0,1'; + RESET enable_seqscan; RESET enable_indexscan; RESET enable_bitmapscan; diff --git a/src/test/regress/sql/opr_sanity.sql b/src/test/regress/sql/opr_sanity.sql index 1a023a088e..8da76ff3de 100644 --- a/src/test/regress/sql/opr_sanity.sql +++ b/src/test/regress/sql/opr_sanity.sql @@ -888,36 +888,37 @@ WHERE p1.amprocfamily = p3.oid AND p3.opfmethod = p2.oid AND -- Detect missing pg_amproc entries: should have as many support functions -- as AM expects for each datatype combination supported by the opfamily. --- GIN is a special case because it has an optional support function. +-- GIST/GIN are special cases because each has an optional support function. SELECT p1.amname, p2.opfname, p3.amproclefttype, p3.amprocrighttype FROM pg_am AS p1, pg_opfamily AS p2, pg_amproc AS p3 WHERE p2.opfmethod = p1.oid AND p3.amprocfamily = p2.oid AND - p1.amname <> 'gin' AND + p1.amname <> 'gist' AND p1.amname <> 'gin' AND p1.amsupport != (SELECT count(*) FROM pg_amproc AS p4 WHERE p4.amprocfamily = p2.oid AND p4.amproclefttype = p3.amproclefttype AND p4.amprocrighttype = p3.amprocrighttype); --- Similar check for GIN, allowing one optional proc +-- Similar check for GIST/GIN, allowing one optional proc SELECT p1.amname, p2.opfname, p3.amproclefttype, p3.amprocrighttype FROM pg_am AS p1, pg_opfamily AS p2, pg_amproc AS p3 WHERE p2.opfmethod = p1.oid AND p3.amprocfamily = p2.oid AND - p1.amname = 'gin' AND - p1.amsupport - 1 > (SELECT count(*) FROM pg_amproc AS p4 - WHERE p4.amprocfamily = p2.oid AND - p4.amproclefttype = p3.amproclefttype AND - p4.amprocrighttype = p3.amprocrighttype); + (p1.amname = 'gist' OR p1.amname = 'gin') AND + (SELECT count(*) FROM pg_amproc AS p4 + WHERE p4.amprocfamily = p2.oid AND + p4.amproclefttype = p3.amproclefttype AND + p4.amprocrighttype = p3.amprocrighttype) + NOT IN (p1.amsupport, p1.amsupport - 1); -- Also, check if there are any pg_opclass entries that don't seem to have --- pg_amproc support. Again, GIN has to be checked separately. +-- pg_amproc support. Again, GIST/GIN have to be checked specially. SELECT amname, opcname, count(*) FROM pg_am am JOIN pg_opclass op ON opcmethod = am.oid LEFT JOIN pg_amproc p ON amprocfamily = opcfamily AND amproclefttype = amprocrighttype AND amproclefttype = opcintype -WHERE am.amname <> 'gin' +WHERE am.amname <> 'gist' AND am.amname <> 'gin' GROUP BY amname, amsupport, opcname, amprocfamily HAVING count(*) != amsupport OR amprocfamily IS NULL; @@ -925,9 +926,10 @@ SELECT amname, opcname, count(*) FROM pg_am am JOIN pg_opclass op ON opcmethod = am.oid LEFT JOIN pg_amproc p ON amprocfamily = opcfamily AND amproclefttype = amprocrighttype AND amproclefttype = opcintype -WHERE am.amname = 'gin' +WHERE am.amname = 'gist' OR am.amname = 'gin' GROUP BY amname, amsupport, opcname, amprocfamily -HAVING count(*) < amsupport - 1 OR amprocfamily IS NULL; +HAVING (count(*) != amsupport AND count(*) != amsupport - 1) + OR amprocfamily IS NULL; -- Unfortunately, we can't check the amproc link very well because the -- signature of the function may be different for different support routines -- 2.11.0