* visibilitymap.c
* bitmap for tracking visibility of heap tuples
*
- * Portions Copyright (c) 1996-2010, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
- * $PostgreSQL: pgsql/src/backend/access/heap/visibilitymap.c,v 1.9 2010/02/26 02:00:33 momjian Exp $
+ * src/backend/access/heap/visibilitymap.c
*
* INTERFACE ROUTINES
- * visibilitymap_clear - clear a bit in the visibility map
- * visibilitymap_pin - pin a map page for setting a bit
- * visibilitymap_set - set a bit in a previously pinned page
- * visibilitymap_test - test if a bit is set
+ * visibilitymap_clear - clear a bit in the visibility map
+ * visibilitymap_pin - pin a map page for setting a bit
+ * visibilitymap_pin_ok - check whether correct map page is already pinned
+ * visibilitymap_set - set a bit in a previously pinned page
+ * visibilitymap_test - test if a bit is set
*
* NOTES
*
* It would be nice to use the visibility map to skip visibility checks in
* index scans.
*
- * Currently, the visibility map is not 100% correct all the time.
- * During updates, the bit in the visibility map is cleared after releasing
- * the lock on the heap page. During the window between releasing the lock
- * and clearing the bit in the visibility map, the bit in the visibility map
- * is set, but the new insertion or deletion is not yet visible to other
- * backends.
- *
- * That might actually be OK for the index scans, though. The newly inserted
- * tuple wouldn't have an index pointer yet, so all tuples reachable from an
- * index would still be visible to all other backends, and deletions wouldn't
- * be visible to other backends yet. (But HOT breaks that argument, no?)
- *
- * There's another hole in the way the PD_ALL_VISIBLE flag is set. When
- * vacuum observes that all tuples are visible to all, it sets the flag on
- * the heap page, and also sets the bit in the visibility map. If we then
- * crash, and only the visibility map page was flushed to disk, we'll have
- * a bit set in the visibility map, but the corresponding flag on the heap
- * page is not set. If the heap page is then updated, the updater won't
- * know to clear the bit in the visibility map. (Isn't that prevented by
- * the LSN interlock?)
- *
*-------------------------------------------------------------------------
*/
#include "postgres.h"
+#include "access/heapam.h"
#include "access/visibilitymap.h"
+#include "miscadmin.h"
#include "storage/bufmgr.h"
#include "storage/bufpage.h"
#include "storage/lmgr.h"
/*
* visibilitymap_clear - clear a bit in visibility map
*
- * Clear a bit in the visibility map, marking that not all tuples are
- * visible to all transactions anymore.
+ * You must pass a buffer containing the correct map page to this function.
+ * Call visibilitymap_pin first to pin the right one. This function doesn't do
+ * any I/O.
*/
void
-visibilitymap_clear(Relation rel, BlockNumber heapBlk)
+visibilitymap_clear(Relation rel, BlockNumber heapBlk, Buffer buf)
{
BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk);
int mapByte = HEAPBLK_TO_MAPBYTE(heapBlk);
int mapBit = HEAPBLK_TO_MAPBIT(heapBlk);
uint8 mask = 1 << mapBit;
- Buffer mapBuffer;
char *map;
#ifdef TRACE_VISIBILITYMAP
elog(DEBUG1, "vm_clear %s %d", RelationGetRelationName(rel), heapBlk);
#endif
- mapBuffer = vm_readbuf(rel, mapBlock, false);
- if (!BufferIsValid(mapBuffer))
- return; /* nothing to do */
+ if (!BufferIsValid(buf) || BufferGetBlockNumber(buf) != mapBlock)
+ elog(ERROR, "wrong buffer passed to visibilitymap_clear");
- LockBuffer(mapBuffer, BUFFER_LOCK_EXCLUSIVE);
- map = PageGetContents(BufferGetPage(mapBuffer));
+ LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
+ map = PageGetContents(BufferGetPage(buf));
if (map[mapByte] & mask)
{
map[mapByte] &= ~mask;
- MarkBufferDirty(mapBuffer);
+ MarkBufferDirty(buf);
}
- UnlockReleaseBuffer(mapBuffer);
+ LockBuffer(buf, BUFFER_LOCK_UNLOCK);
}
/*
* On entry, *buf should be InvalidBuffer or a valid buffer returned by
* an earlier call to visibilitymap_pin or visibilitymap_test on the same
* relation. On return, *buf is a valid buffer with the map page containing
- * the the bit for heapBlk.
+ * the bit for heapBlk.
*
* If the page doesn't exist in the map file yet, it is extended.
*/
}
/*
+ * visibilitymap_pin_ok - do we already have the correct page pinned?
+ *
+ * On entry, buf should be InvalidBuffer or a valid buffer returned by
+ * an earlier call to visibilitymap_pin or visibilitymap_test on the same
+ * relation. The return value indicates whether the buffer covers the
+ * given heapBlk.
+ */
+bool
+visibilitymap_pin_ok(BlockNumber heapBlk, Buffer buf)
+{
+ BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk);
+
+ return BufferIsValid(buf) && BufferGetBlockNumber(buf) == mapBlock;
+}
+
+/*
* visibilitymap_set - set a bit on a previously pinned page
*
- * recptr is the LSN of the heap page. The LSN of the visibility map page is
- * advanced to that, to make sure that the visibility map doesn't get flushed
- * to disk before the update to the heap page that made all tuples visible.
+ * recptr is the LSN of the XLOG record we're replaying, if we're in recovery,
+ * or InvalidXLogRecPtr in normal running. The page LSN is advanced to the
+ * one provided; in normal running, we generate a new XLOG record and set the
+ * page LSN to that value.
*
- * This is an opportunistic function. It does nothing, unless *buf
- * contains the bit for heapBlk. Call visibilitymap_pin first to pin
- * the right map page. This function doesn't do any I/O.
+ * You must pass a buffer containing the correct map page to this function.
+ * Call visibilitymap_pin first to pin the right one. This function doesn't do
+ * any I/O.
*/
void
visibilitymap_set(Relation rel, BlockNumber heapBlk, XLogRecPtr recptr,
- Buffer *buf)
+ Buffer buf)
{
BlockNumber mapBlock = HEAPBLK_TO_MAPBLOCK(heapBlk);
uint32 mapByte = HEAPBLK_TO_MAPBYTE(heapBlk);
elog(DEBUG1, "vm_set %s %d", RelationGetRelationName(rel), heapBlk);
#endif
+ Assert(InRecovery || XLogRecPtrIsInvalid(recptr));
+
/* Check that we have the right page pinned */
- if (!BufferIsValid(*buf) || BufferGetBlockNumber(*buf) != mapBlock)
- return;
+ if (!BufferIsValid(buf) || BufferGetBlockNumber(buf) != mapBlock)
+ elog(ERROR, "wrong buffer passed to visibilitymap_set");
- page = BufferGetPage(*buf);
+ page = BufferGetPage(buf);
map = PageGetContents(page);
- LockBuffer(*buf, BUFFER_LOCK_EXCLUSIVE);
+ LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
if (!(map[mapByte] & (1 << mapBit)))
{
+ START_CRIT_SECTION();
+
map[mapByte] |= (1 << mapBit);
+ MarkBufferDirty(buf);
- if (XLByteLT(PageGetLSN(page), recptr))
+ if (RelationNeedsWAL(rel))
+ {
+ if (XLogRecPtrIsInvalid(recptr))
+ recptr = log_heap_visible(rel->rd_node, heapBlk, buf);
PageSetLSN(page, recptr);
- PageSetTLI(page, ThisTimeLineID);
- MarkBufferDirty(*buf);
+ PageSetTLI(page, ThisTimeLineID);
+ }
+
+ END_CRIT_SECTION();
}
- LockBuffer(*buf, BUFFER_LOCK_UNLOCK);
+ LockBuffer(buf, BUFFER_LOCK_UNLOCK);
}
/*
* On entry, *buf should be InvalidBuffer or a valid buffer returned by an
* earlier call to visibilitymap_pin or visibilitymap_test on the same
* relation. On return, *buf is a valid buffer with the map page containing
- * the the bit for heapBlk, or InvalidBuffer. The caller is responsible for
+ * the bit for heapBlk, or InvalidBuffer. The caller is responsible for
* releasing *buf after it's done testing and setting bits.
*/
bool
}
/* Truncate the unused VM pages, and send smgr inval message */
- smgrtruncate(rel->rd_smgr, VISIBILITYMAP_FORKNUM, newnblocks,
- rel->rd_istemp);
+ smgrtruncate(rel->rd_smgr, VISIBILITYMAP_FORKNUM, newnblocks);
/*
* We might as well update the local smgr_vm_nblocks setting. smgrtruncate
while (vm_nblocks_now < vm_nblocks)
{
smgrextend(rel->rd_smgr, VISIBILITYMAP_FORKNUM, vm_nblocks_now,
- (char *) pg, rel->rd_istemp);
+ (char *) pg, false);
vm_nblocks_now++;
}