From 2ab57e089b0b94ca43dadfb3d567ee59a7ca6276 Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Sat, 31 Jul 2010 00:30:54 +0000 Subject: [PATCH] Rewrite the key-combination logic in GIN's keyGetItem() and scanGetItem() routines to make them behave better in the presence of "lossy" index pointers. The previous coding was outright incorrect for some cases, as recently reported by Artur Dabrowski: scanGetItem would fail to return index entries in cases where one index key had multiple exact pointers on the same page as another key had a lossy pointer. Also, keyGetItem was extremely inefficient for cases where a single index key generates multiple "entry" streams, such as an @@ operator with a multiple-clause tsquery. The presence of a lossy page pointer in any one stream defeated its ability to use the opclass consistentFn, resulting in probing many heap pages that didn't really need to be visited. In Artur's example case, a query like WHERE tsvector @@ to_tsquery('a & b') was about 50X slower than the theoretically equivalent WHERE tsvector @@ to_tsquery('a') AND tsvector @@ to_tsquery('b') The way that I chose to fix this was to have GIN call the consistentFn twice with both TRUE and FALSE values for the in-doubt entry stream, returning a hit if either call produces TRUE, but not if they both return FALSE. The code handles this for the case of a single in-doubt entry stream, but punts (falling back to the stupid behavior) if there's more than one lossy reference to the same page. The idea could be scaled up to deal with multiple lossy references, but I think that would probably be wasted complexity. At least to judge by Artur's example, such cases don't occur often enough to be worth trying to optimize. Back-patch to 8.4. 8.3 did not have lossy GIN index pointers, so not subject to these problems. --- src/backend/access/gin/ginget.c | 354 ++++++++++++++++++++++++--------------- src/backend/access/gin/ginscan.c | 12 +- src/include/access/gin.h | 4 +- 3 files changed, 226 insertions(+), 144 deletions(-) diff --git a/src/backend/access/gin/ginget.c b/src/backend/access/gin/ginget.c index 705d167963..7547bc3891 100644 --- a/src/backend/access/gin/ginget.c +++ b/src/backend/access/gin/ginget.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/gin/ginget.c,v 1.30 2010/02/26 02:00:33 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/access/gin/ginget.c,v 1.31 2010/07/31 00:30:54 tgl Exp $ *------------------------------------------------------------------------- */ @@ -412,7 +412,6 @@ startScanKey(Relation index, GinState *ginstate, GinScanKey key) for (i = 0; i < key->nentries; i++) startScanEntry(index, ginstate, key->scanEntry + i); - memset(key->entryRes, TRUE, sizeof(bool) * key->nentries); key->isFinished = FALSE; key->firstCall = FALSE; @@ -461,11 +460,9 @@ entryGetNextItem(Relation index, GinScanEntry entry) for (;;) { - entry->offset++; - - if (entry->offset <= entry->nlist) + if (entry->offset < entry->nlist) { - entry->curItem = entry->list[entry->offset - 1]; + entry->curItem = entry->list[entry->offset++]; return; } @@ -484,7 +481,7 @@ entryGetNextItem(Relation index, GinScanEntry entry) if (blkno == InvalidBlockNumber) { ReleaseBuffer(entry->buffer); - ItemPointerSet(&entry->curItem, InvalidBlockNumber, InvalidOffsetNumber); + ItemPointerSetInvalid(&entry->curItem); entry->buffer = InvalidBuffer; entry->isFinished = TRUE; return; @@ -495,7 +492,8 @@ entryGetNextItem(Relation index, GinScanEntry entry) page = BufferGetPage(entry->buffer); entry->offset = InvalidOffsetNumber; - if (!ItemPointerIsValid(&entry->curItem) || findItemInPage(page, &entry->curItem, &entry->offset)) + if (!ItemPointerIsValid(&entry->curItem) || + findItemInPage(page, &entry->curItem, &entry->offset)) { /* * Found position equal to or greater than stored @@ -507,7 +505,8 @@ entryGetNextItem(Relation index, GinScanEntry entry) LockBuffer(entry->buffer, GIN_UNLOCK); if (!ItemPointerIsValid(&entry->curItem) || - compareItemPointers(&entry->curItem, entry->list + entry->offset - 1) == 0) + compareItemPointers(&entry->curItem, + entry->list + entry->offset - 1) == 0) { /* * First pages are deleted or empty, or we found exact @@ -532,12 +531,14 @@ entryGetNextItem(Relation index, GinScanEntry entry) #define dropItem(e) ( gin_rand() > ((double)GinFuzzySearchLimit)/((double)((e)->predictNumberResult)) ) /* - * Sets entry->curItem to new found heap item pointer for one - * entry of one scan key + * Sets entry->curItem to next heap item pointer for one entry of one scan key, + * or sets entry->isFinished to TRUE if there are no more. */ -static bool +static void entryGetItem(Relation index, GinScanEntry entry) { + Assert(!entry->isFinished); + if (entry->master) { entry->isFinished = entry->master->isFinished; @@ -554,7 +555,7 @@ entryGetItem(Relation index, GinScanEntry entry) if (entry->partialMatchResult == NULL) { - ItemPointerSet(&entry->curItem, InvalidBlockNumber, InvalidOffsetNumber); + ItemPointerSetInvalid(&entry->curItem); tbm_end_iterate(entry->partialMatchIterator); entry->partialMatchIterator = NULL; entry->isFinished = TRUE; @@ -600,7 +601,7 @@ entryGetItem(Relation index, GinScanEntry entry) entry->curItem = entry->list[entry->offset - 1]; else { - ItemPointerSet(&entry->curItem, InvalidBlockNumber, InvalidOffsetNumber); + ItemPointerSetInvalid(&entry->curItem); entry->isFinished = TRUE; } } @@ -609,127 +610,175 @@ entryGetItem(Relation index, GinScanEntry entry) do { entryGetNextItem(index, entry); - } while (entry->isFinished == FALSE && entry->reduceResult == TRUE && dropItem(entry)); + } while (entry->isFinished == FALSE && + entry->reduceResult == TRUE && + dropItem(entry)); } - - return entry->isFinished; } /* - * Sets key->curItem to new found heap item pointer for one scan key - * Returns isFinished, ie TRUE means we did NOT get a new item pointer! - * Also, *keyrecheck is set true if recheck is needed for this scan key. - * Note: lossy page could be returned after items from the same page. + * Sets key->curItem to next heap item pointer for one scan key, advancing + * past any item pointers <= advancePast. + * Sets key->isFinished to TRUE if there are no more. + * + * On success, key->recheckCurItem is set true iff recheck is needed for this + * item pointer (including the case where the item pointer is a lossy page + * pointer). + * + * Note: lossy page could be returned after single items from the same page. + * This is OK since the results will just be used to build a bitmap; we'll + * set a bitmap entry more than once, but never actually return a row twice. */ -static bool +static void keyGetItem(Relation index, GinState *ginstate, MemoryContext tempCtx, - GinScanKey key, bool *keyrecheck) + GinScanKey key, ItemPointer advancePast) { + ItemPointerData myAdvancePast = *advancePast; uint32 i; + uint32 lossyEntry; + bool haveLossyEntry; GinScanEntry entry; bool res; MemoryContext oldCtx; - if (key->isFinished) - return TRUE; + Assert(!key->isFinished); do { /* - * move forward from previously value and set new curItem, which is - * minimal from entries->curItems. Lossy page is encoded by - * ItemPointer with max value for offset (0xffff), so if there is an - * non-lossy entries on lossy page they will returned too and after - * that the whole page. That's not a problem for resulting tidbitmap. + * Advance any entries that are <= myAdvancePast. In particular, + * since entry->curItem was initialized with ItemPointerSetMin, this + * ensures we fetch the first item for each entry on the first call. + * Then set key->curItem to the minimum of the valid entry curItems. + * + * Note: a lossy-page entry is encoded by a ItemPointer with max value + * for offset (0xffff), so that it will sort after any exact entries + * for the same page. So we'll prefer to return exact pointers not + * lossy pointers, which is good. Also, when we advance past an exact + * entry after processing it, we will not advance past lossy entries + * for the same page in other keys, which is NECESSARY for correct + * results (since we might have additional entries for the same page + * in the first key). */ ItemPointerSetMax(&key->curItem); + for (i = 0; i < key->nentries; i++) { entry = key->scanEntry + i; - if (key->entryRes[i]) - { - /* - * Move forward only entries which was the least on previous - * call, key->entryRes[i] points that current entry was a - * result of loop/call. - */ - if (entry->isFinished == FALSE && entryGetItem(index, entry) == FALSE) - { - if (compareItemPointers(&entry->curItem, &key->curItem) < 0) - key->curItem = entry->curItem; - } - else - key->entryRes[i] = FALSE; - } - else if (entry->isFinished == FALSE) - { - if (compareItemPointers(&entry->curItem, &key->curItem) < 0) - key->curItem = entry->curItem; - } + while (entry->isFinished == FALSE && + compareItemPointers(&entry->curItem, &myAdvancePast) <= 0) + entryGetItem(index, entry); + + if (entry->isFinished == FALSE && + compareItemPointers(&entry->curItem, &key->curItem) < 0) + key->curItem = entry->curItem; } if (ItemPointerIsMax(&key->curItem)) { /* all entries are finished */ key->isFinished = TRUE; - return TRUE; + return; } /* - * Now key->curItem contains closest ItemPointer to previous result. - * - * if key->nentries == 1 then the consistentFn should always succeed, - * but we must call it anyway to find out the recheck status. + * Now key->curItem contains first ItemPointer after previous result. + * Advance myAdvancePast to this value, so that if the consistentFn + * rejects the entry and we loop around again, we will advance to the + * next available item pointer. */ + myAdvancePast = key->curItem; - /*---------- - * entryRes array is used for: - * - as an argument for consistentFn - * - entry->curItem with corresponding key->entryRes[i] == false are - * greater than key->curItem, so next loop/call they should be - * renewed by entryGetItem(). So, we need to set up an array before - * checking of lossy page. - *---------- + /* + * Prepare entryRes array to be passed to consistentFn. + * + * If key->nentries == 1 then the consistentFn should always succeed, + * but we must call it anyway to find out the recheck status. + * + * Lossy-page entries pose a problem, since we don't know the correct + * entryRes state to pass to the consistentFn, and we also don't know + * what its combining logic will be (could be AND, OR, or even NOT). + * Our strategy for a single lossy-page entry is to try the + * consistentFn both ways and return a hit if it accepts either one + * (forcing the hit to be marked lossy so it will be rechecked). + * + * This idea could be generalized to more than one lossy-page entry, + * but ideally lossy-page entries should be infrequent so it would + * seldom be the case that we have more than one. If we do find more + * than one, we just punt and return the item as lossy. + * + * Note that only lossy-page entries pointing to the current item's + * page should trigger this processing. */ + lossyEntry = 0; + haveLossyEntry = false; for (i = 0; i < key->nentries; i++) { entry = key->scanEntry + i; - if (entry->isFinished == FALSE && - compareItemPointers(&entry->curItem, &key->curItem) == 0) + if (entry->isFinished) + key->entryRes[i] = FALSE; + else if (ItemPointerIsLossyPage(&entry->curItem) && + GinItemPointerGetBlockNumber(&entry->curItem) == + GinItemPointerGetBlockNumber(&key->curItem)) + { + if (haveLossyEntry) + { + /* Too many lossy entries, punt */ + key->recheckCurItem = true; + return; + } + lossyEntry = i; + haveLossyEntry = true; + /* initially assume TRUE */ + key->entryRes[i] = TRUE; + } + else if (compareItemPointers(&entry->curItem, &key->curItem) == 0) key->entryRes[i] = TRUE; else key->entryRes[i] = FALSE; } + oldCtx = MemoryContextSwitchTo(tempCtx); + /* - * Initialize *keyrecheck in case the consistentFn doesn't know it + * Initialize recheckCurItem in case the consistentFn doesn't know it * should set it. The safe assumption in that case is to force * recheck. */ - *keyrecheck = true; - - /* - * If one of the entry's scans returns lossy result, return it without - * further checking - we can't call consistentFn for lack of data. - */ - if (ItemPointerIsLossyPage(&key->curItem)) - return FALSE; + key->recheckCurItem = true; - oldCtx = MemoryContextSwitchTo(tempCtx); res = DatumGetBool(FunctionCall6(&ginstate->consistentFn[key->attnum - 1], PointerGetDatum(key->entryRes), UInt16GetDatum(key->strategy), key->query, UInt32GetDatum(key->nentries), PointerGetDatum(key->extra_data), - PointerGetDatum(keyrecheck))); + PointerGetDatum(&key->recheckCurItem))); + + if (!res && haveLossyEntry) + { + /* try the other way for the lossy item */ + key->entryRes[lossyEntry] = FALSE; + key->recheckCurItem = true; + + res = DatumGetBool(FunctionCall6(&ginstate->consistentFn[key->attnum - 1], + PointerGetDatum(key->entryRes), + UInt16GetDatum(key->strategy), + key->query, + UInt32GetDatum(key->nentries), + PointerGetDatum(key->extra_data), + PointerGetDatum(&key->recheckCurItem))); + } + MemoryContextSwitchTo(oldCtx); MemoryContextReset(tempCtx); - } while (!res); - return FALSE; + /* If we matched a lossy entry, force recheckCurItem = true */ + if (haveLossyEntry) + key->recheckCurItem = true; + } while (!res); } @@ -904,7 +953,7 @@ collectDatumForItem(IndexScanDesc scan, pendingPosition *pos) j; /* - * Resets entryRes + * Reset entryRes */ for (i = 0; i < so->nkeys; i++) { @@ -1145,75 +1194,103 @@ scanPendingInsert(IndexScanDesc scan, TIDBitmap *tbm, int64 *ntids) } /* - * Get heap item pointer from scan - * returns true if found + * Get next heap item pointer (after advancePast) from scan. + * Returns true if anything found. + * On success, *item and *recheck are set. + * + * Note: this is very nearly the same logic as in keyGetItem(), except + * that we know the keys are to be combined with AND logic, whereas in + * keyGetItem() the combination logic is known only to the consistentFn. */ static bool -scanGetItem(IndexScanDesc scan, ItemPointerData *item, bool *recheck) +scanGetItem(IndexScanDesc scan, ItemPointer advancePast, + ItemPointerData *item, bool *recheck) { GinScanOpaque so = (GinScanOpaque) scan->opaque; + ItemPointerData myAdvancePast = *advancePast; uint32 i; - bool keyrecheck; + bool match; + + for (;;) + { + /* + * Advance any keys that are <= myAdvancePast. In particular, + * since key->curItem was initialized with ItemPointerSetMin, this + * ensures we fetch the first item for each key on the first call. + * Then set *item to the minimum of the key curItems. + * + * Note: a lossy-page entry is encoded by a ItemPointer with max value + * for offset (0xffff), so that it will sort after any exact entries + * for the same page. So we'll prefer to return exact pointers not + * lossy pointers, which is good. Also, when we advance past an exact + * entry after processing it, we will not advance past lossy entries + * for the same page in other keys, which is NECESSARY for correct + * results (since we might have additional entries for the same page + * in the first key). + */ + ItemPointerSetMax(item); + + for (i = 0; i < so->nkeys; i++) + { + GinScanKey key = so->keys + i; + + while (key->isFinished == FALSE && + compareItemPointers(&key->curItem, &myAdvancePast) <= 0) + keyGetItem(scan->indexRelation, &so->ginstate, so->tempCtx, + key, &myAdvancePast); + + if (key->isFinished) + return FALSE; /* finished one of keys */ + + if (compareItemPointers(&key->curItem, item) < 0) + *item = key->curItem; + } + + Assert(!ItemPointerIsMax(item)); + + /* + * Now *item contains first ItemPointer after previous result. + * + * The item is a valid hit only if all the keys returned either + * that exact TID, or a lossy reference to the same page. + */ + match = true; + for (i = 0; i < so->nkeys; i++) + { + GinScanKey key = so->keys + i; + + if (compareItemPointers(item, &key->curItem) == 0) + continue; + if (ItemPointerIsLossyPage(&key->curItem) && + GinItemPointerGetBlockNumber(&key->curItem) == + GinItemPointerGetBlockNumber(item)) + continue; + match = false; + break; + } + + if (match) + break; + + /* + * No hit. Update myAdvancePast to this TID, so that on the next + * pass we'll move to the next possible entry. + */ + myAdvancePast = *item; + } /* - * We return recheck = true if any of the keyGetItem calls return - * keyrecheck = true. Note that because the second loop might advance - * some keys, this could theoretically be too conservative. In practice - * though, we expect that a consistentFn's recheck result will depend only - * on the operator and the query, so for any one key it should stay the - * same regardless of advancing to new items. So it's not worth working - * harder. + * We must return recheck = true if any of the keys are marked recheck. */ *recheck = false; - - ItemPointerSetMin(item); for (i = 0; i < so->nkeys; i++) { GinScanKey key = so->keys + i; - if (keyGetItem(scan->indexRelation, &so->ginstate, so->tempCtx, - key, &keyrecheck)) - return FALSE; /* finished one of keys */ - if (compareItemPointers(item, &key->curItem) < 0) - *item = key->curItem; - *recheck |= keyrecheck; - } - - for (i = 1; i <= so->nkeys; i++) - { - GinScanKey key = so->keys + i - 1; - - for (;;) + if (key->recheckCurItem) { - int cmp = compareItemPointers(item, &key->curItem); - - if (cmp != 0 && (ItemPointerIsLossyPage(item) || ItemPointerIsLossyPage(&key->curItem))) - { - /* - * if one of ItemPointers points to the whole page then - * compare only page's number - */ - if (ItemPointerGetBlockNumber(item) == ItemPointerGetBlockNumber(&key->curItem)) - cmp = 0; - else - cmp = (ItemPointerGetBlockNumber(item) > ItemPointerGetBlockNumber(&key->curItem)) ? 1 : -1; - } - - if (cmp == 0) - break; - else if (cmp > 0) - { - if (keyGetItem(scan->indexRelation, &so->ginstate, so->tempCtx, - key, &keyrecheck)) - return FALSE; /* finished one of keys */ - *recheck |= keyrecheck; - } - else - { /* returns to begin */ - *item = key->curItem; - i = 0; - break; - } + *recheck = true; + break; } } @@ -1229,6 +1306,8 @@ gingetbitmap(PG_FUNCTION_ARGS) IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); TIDBitmap *tbm = (TIDBitmap *) PG_GETARG_POINTER(1); int64 ntids; + ItemPointerData iptr; + bool recheck; if (GinIsNewKey(scan)) newScanKey(scan); @@ -1255,14 +1334,13 @@ gingetbitmap(PG_FUNCTION_ARGS) */ startScan(scan); + ItemPointerSetMin(&iptr); + for (;;) { - ItemPointerData iptr; - bool recheck; - CHECK_FOR_INTERRUPTS(); - if (!scanGetItem(scan, &iptr, &recheck)) + if (!scanGetItem(scan, &iptr, &iptr, &recheck)) break; if (ItemPointerIsLossyPage(&iptr)) diff --git a/src/backend/access/gin/ginscan.c b/src/backend/access/gin/ginscan.c index 1aa4ea9082..c9cefa8e80 100644 --- a/src/backend/access/gin/ginscan.c +++ b/src/backend/access/gin/ginscan.c @@ -8,7 +8,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/access/gin/ginscan.c,v 1.26 2010/01/18 11:50:43 teodor Exp $ + * $PostgreSQL: pgsql/src/backend/access/gin/ginscan.c,v 1.27 2010/07/31 00:30:54 tgl Exp $ *------------------------------------------------------------------------- */ @@ -51,7 +51,7 @@ fillScanKey(GinState *ginstate, GinScanKey key, OffsetNumber attnum, Datum query key->extra_data = extra_data; key->query = query; key->firstCall = TRUE; - ItemPointerSet(&(key->curItem), InvalidBlockNumber, InvalidOffsetNumber); + ItemPointerSetMin(&key->curItem); for (i = 0; i < nEntryValues; i++) { @@ -59,7 +59,8 @@ fillScanKey(GinState *ginstate, GinScanKey key, OffsetNumber attnum, Datum query key->scanEntry[i].entry = entryValues[i]; key->scanEntry[i].attnum = attnum; key->scanEntry[i].extra_data = (extra_data) ? extra_data[i] : NULL; - ItemPointerSet(&(key->scanEntry[i].curItem), InvalidBlockNumber, InvalidOffsetNumber); + ItemPointerSetMin(&key->scanEntry[i].curItem); + key->scanEntry[i].isFinished = FALSE; key->scanEntry[i].offset = InvalidOffsetNumber; key->scanEntry[i].buffer = InvalidBuffer; key->scanEntry[i].partialMatch = NULL; @@ -100,14 +101,15 @@ resetScanKeys(GinScanKey keys, uint32 nkeys) GinScanKey key = keys + i; key->firstCall = TRUE; - ItemPointerSet(&(key->curItem), InvalidBlockNumber, InvalidOffsetNumber); + ItemPointerSetMin(&key->curItem); for (j = 0; j < key->nentries; j++) { if (key->scanEntry[j].buffer != InvalidBuffer) ReleaseBuffer(key->scanEntry[i].buffer); - ItemPointerSet(&(key->scanEntry[j].curItem), InvalidBlockNumber, InvalidOffsetNumber); + ItemPointerSetMin(&key->scanEntry[j].curItem); + key->scanEntry[j].isFinished = FALSE; key->scanEntry[j].offset = InvalidOffsetNumber; key->scanEntry[j].buffer = InvalidBuffer; key->scanEntry[j].list = NULL; diff --git a/src/include/access/gin.h b/src/include/access/gin.h index c935838576..236c135568 100644 --- a/src/include/access/gin.h +++ b/src/include/access/gin.h @@ -4,7 +4,7 @@ * * Copyright (c) 2006-2010, PostgreSQL Global Development Group * - * $PostgreSQL: pgsql/src/include/access/gin.h,v 1.38 2010/02/26 02:01:20 momjian Exp $ + * $PostgreSQL: pgsql/src/include/access/gin.h,v 1.39 2010/07/31 00:30:54 tgl Exp $ *-------------------------------------------------------------------------- */ #ifndef GIN_H @@ -520,6 +520,8 @@ typedef struct GinScanKeyData OffsetNumber attnum; ItemPointerData curItem; + bool recheckCurItem; + bool firstCall; bool isFinished; } GinScanKeyData; -- 2.11.0