OSDN Git Service

916340288a7d88e9fe2f35b684dac8e7c18195ba
[pg-rex/syncrep.git] / src / backend / executor / nodeBitmapHeapscan.c
1 /*-------------------------------------------------------------------------
2  *
3  * nodeBitmapHeapscan.c
4  *        Routines to support bitmapped scans of relations
5  *
6  * NOTE: it is critical that this plan type only be used with MVCC-compliant
7  * snapshots (ie, regular snapshots, not SnapshotNow or one of the other
8  * special snapshots).  The reason is that since index and heap scans are
9  * decoupled, there can be no assurance that the index tuple prompting a
10  * visit to a particular heap TID still exists when the visit is made.
11  * Therefore the tuple might not exist anymore either (which is OK because
12  * heap_fetch will cope) --- but worse, the tuple slot could have been
13  * re-used for a newer tuple.  With an MVCC snapshot the newer tuple is
14  * certain to fail the time qual and so it will not be mistakenly returned.
15  * With SnapshotNow we might return a tuple that doesn't meet the required
16  * index qual conditions.
17  *
18  *
19  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
20  * Portions Copyright (c) 1994, Regents of the University of California
21  *
22  *
23  * IDENTIFICATION
24  *        $PostgreSQL: pgsql/src/backend/executor/nodeBitmapHeapscan.c,v 1.28 2008/05/13 15:44:08 momjian Exp $
25  *
26  *-------------------------------------------------------------------------
27  */
28 /*
29  * INTERFACE ROUTINES
30  *              ExecBitmapHeapScan                      scans a relation using bitmap info
31  *              ExecBitmapHeapNext                      workhorse for above
32  *              ExecInitBitmapHeapScan          creates and initializes state info.
33  *              ExecBitmapHeapReScan            prepares to rescan the plan.
34  *              ExecEndBitmapHeapScan           releases all storage.
35  */
36 #include "postgres.h"
37
38 #include "access/heapam.h"
39 #include "executor/execdebug.h"
40 #include "executor/nodeBitmapHeapscan.h"
41 #include "pgstat.h"
42 #include "storage/bufmgr.h"
43 #include "utils/memutils.h"
44 #include "utils/snapmgr.h"
45 #include "utils/tqual.h"
46
47
48 static TupleTableSlot *BitmapHeapNext(BitmapHeapScanState *node);
49 static void bitgetpage(HeapScanDesc scan, TBMIterateResult *tbmres);
50
51
52 /* ----------------------------------------------------------------
53  *              BitmapHeapNext
54  *
55  *              Retrieve next tuple from the BitmapHeapScan node's currentRelation
56  * ----------------------------------------------------------------
57  */
58 static TupleTableSlot *
59 BitmapHeapNext(BitmapHeapScanState *node)
60 {
61         EState     *estate;
62         ExprContext *econtext;
63         HeapScanDesc scan;
64         Index           scanrelid;
65         TIDBitmap  *tbm;
66         TBMIterateResult *tbmres;
67         OffsetNumber targoffset;
68         TupleTableSlot *slot;
69
70         /*
71          * extract necessary information from index scan node
72          */
73         estate = node->ss.ps.state;
74         econtext = node->ss.ps.ps_ExprContext;
75         slot = node->ss.ss_ScanTupleSlot;
76         scan = node->ss.ss_currentScanDesc;
77         scanrelid = ((BitmapHeapScan *) node->ss.ps.plan)->scan.scanrelid;
78         tbm = node->tbm;
79         tbmres = node->tbmres;
80
81         /*
82          * Check if we are evaluating PlanQual for tuple of this relation.
83          * Additional checking is not good, but no other way for now. We could
84          * introduce new nodes for this case and handle IndexScan --> NewNode
85          * switching in Init/ReScan plan...
86          */
87         if (estate->es_evTuple != NULL &&
88                 estate->es_evTuple[scanrelid - 1] != NULL)
89         {
90                 if (estate->es_evTupleNull[scanrelid - 1])
91                         return ExecClearTuple(slot);
92
93                 ExecStoreTuple(estate->es_evTuple[scanrelid - 1],
94                                            slot, InvalidBuffer, false);
95
96                 /* Does the tuple meet the original qual conditions? */
97                 econtext->ecxt_scantuple = slot;
98
99                 ResetExprContext(econtext);
100
101                 if (!ExecQual(node->bitmapqualorig, econtext, false))
102                         ExecClearTuple(slot);           /* would not be returned by scan */
103
104                 /* Flag for the next call that no more tuples */
105                 estate->es_evTupleNull[scanrelid - 1] = true;
106
107                 return slot;
108         }
109
110         /*
111          * If we haven't yet performed the underlying index scan, do it, and
112          * prepare the bitmap to be iterated over.
113          */
114         if (tbm == NULL)
115         {
116                 tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node));
117
118                 if (!tbm || !IsA(tbm, TIDBitmap))
119                         elog(ERROR, "unrecognized result from subplan");
120
121                 node->tbm = tbm;
122                 node->tbmres = tbmres = NULL;
123
124                 tbm_begin_iterate(tbm);
125         }
126
127         for (;;)
128         {
129                 Page            dp;
130                 ItemId          lp;
131
132                 /*
133                  * Get next page of results if needed
134                  */
135                 if (tbmres == NULL)
136                 {
137                         node->tbmres = tbmres = tbm_iterate(tbm);
138                         if (tbmres == NULL)
139                         {
140                                 /* no more entries in the bitmap */
141                                 break;
142                         }
143
144                         /*
145                          * Ignore any claimed entries past what we think is the end of the
146                          * relation.  (This is probably not necessary given that we got at
147                          * least AccessShareLock on the table before performing any of the
148                          * indexscans, but let's be safe.)
149                          */
150                         if (tbmres->blockno >= scan->rs_nblocks)
151                         {
152                                 node->tbmres = tbmres = NULL;
153                                 continue;
154                         }
155
156                         /*
157                          * Fetch the current heap page and identify candidate tuples.
158                          */
159                         bitgetpage(scan, tbmres);
160
161                         /*
162                          * Set rs_cindex to first slot to examine
163                          */
164                         scan->rs_cindex = 0;
165                 }
166                 else
167                 {
168                         /*
169                          * Continuing in previously obtained page; advance rs_cindex
170                          */
171                         scan->rs_cindex++;
172                 }
173
174                 /*
175                  * Out of range?  If so, nothing more to look at on this page
176                  */
177                 if (scan->rs_cindex < 0 || scan->rs_cindex >= scan->rs_ntuples)
178                 {
179                         node->tbmres = tbmres = NULL;
180                         continue;
181                 }
182
183                 /*
184                  * Okay to fetch the tuple
185                  */
186                 targoffset = scan->rs_vistuples[scan->rs_cindex];
187                 dp = (Page) BufferGetPage(scan->rs_cbuf);
188                 lp = PageGetItemId(dp, targoffset);
189                 Assert(ItemIdIsNormal(lp));
190
191                 scan->rs_ctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp);
192                 scan->rs_ctup.t_len = ItemIdGetLength(lp);
193                 ItemPointerSet(&scan->rs_ctup.t_self, tbmres->blockno, targoffset);
194
195                 pgstat_count_heap_fetch(scan->rs_rd);
196
197                 /*
198                  * Set up the result slot to point to this tuple. Note that the slot
199                  * acquires a pin on the buffer.
200                  */
201                 ExecStoreTuple(&scan->rs_ctup,
202                                            slot,
203                                            scan->rs_cbuf,
204                                            false);
205
206                 /*
207                  * If we are using lossy info, we have to recheck the qual conditions
208                  * at every tuple.
209                  */
210                 if (tbmres->recheck)
211                 {
212                         econtext->ecxt_scantuple = slot;
213                         ResetExprContext(econtext);
214
215                         if (!ExecQual(node->bitmapqualorig, econtext, false))
216                         {
217                                 /* Fails recheck, so drop it and loop back for another */
218                                 ExecClearTuple(slot);
219                                 continue;
220                         }
221                 }
222
223                 /* OK to return this tuple */
224                 return slot;
225         }
226
227         /*
228          * if we get here it means we are at the end of the scan..
229          */
230         return ExecClearTuple(slot);
231 }
232
233 /*
234  * bitgetpage - subroutine for BitmapHeapNext()
235  *
236  * This routine reads and pins the specified page of the relation, then
237  * builds an array indicating which tuples on the page are both potentially
238  * interesting according to the bitmap, and visible according to the snapshot.
239  */
240 static void
241 bitgetpage(HeapScanDesc scan, TBMIterateResult *tbmres)
242 {
243         BlockNumber page = tbmres->blockno;
244         Buffer          buffer;
245         Snapshot        snapshot;
246         int                     ntup;
247
248         /*
249          * Acquire pin on the target heap page, trading in any pin we held before.
250          */
251         Assert(page < scan->rs_nblocks);
252
253         scan->rs_cbuf = ReleaseAndReadBuffer(scan->rs_cbuf,
254                                                                                  scan->rs_rd,
255                                                                                  page);
256         buffer = scan->rs_cbuf;
257         snapshot = scan->rs_snapshot;
258
259         ntup = 0;
260
261         /*
262          * Prune and repair fragmentation for the whole page, if possible.
263          */
264         heap_page_prune_opt(scan->rs_rd, buffer, RecentGlobalXmin);
265
266         /*
267          * We must hold share lock on the buffer content while examining tuple
268          * visibility.  Afterwards, however, the tuples we have found to be
269          * visible are guaranteed good as long as we hold the buffer pin.
270          */
271         LockBuffer(buffer, BUFFER_LOCK_SHARE);
272
273         /*
274          * We need two separate strategies for lossy and non-lossy cases.
275          */
276         if (tbmres->ntuples >= 0)
277         {
278                 /*
279                  * Bitmap is non-lossy, so we just look through the offsets listed in
280                  * tbmres; but we have to follow any HOT chain starting at each such
281                  * offset.
282                  */
283                 int                     curslot;
284
285                 for (curslot = 0; curslot < tbmres->ntuples; curslot++)
286                 {
287                         OffsetNumber offnum = tbmres->offsets[curslot];
288                         ItemPointerData tid;
289
290                         ItemPointerSet(&tid, page, offnum);
291                         if (heap_hot_search_buffer(&tid, buffer, snapshot, NULL))
292                                 scan->rs_vistuples[ntup++] = ItemPointerGetOffsetNumber(&tid);
293                 }
294         }
295         else
296         {
297                 /*
298                  * Bitmap is lossy, so we must examine each item pointer on the page.
299                  * But we can ignore HOT chains, since we'll check each tuple anyway.
300                  */
301                 Page            dp = (Page) BufferGetPage(buffer);
302                 OffsetNumber maxoff = PageGetMaxOffsetNumber(dp);
303                 OffsetNumber offnum;
304
305                 for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum = OffsetNumberNext(offnum))
306                 {
307                         ItemId          lp;
308                         HeapTupleData loctup;
309
310                         lp = PageGetItemId(dp, offnum);
311                         if (!ItemIdIsNormal(lp))
312                                 continue;
313                         loctup.t_data = (HeapTupleHeader) PageGetItem((Page) dp, lp);
314                         loctup.t_len = ItemIdGetLength(lp);
315                         if (HeapTupleSatisfiesVisibility(&loctup, snapshot, buffer))
316                                 scan->rs_vistuples[ntup++] = offnum;
317                 }
318         }
319
320         LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
321
322         Assert(ntup <= MaxHeapTuplesPerPage);
323         scan->rs_ntuples = ntup;
324 }
325
326 /* ----------------------------------------------------------------
327  *              ExecBitmapHeapScan(node)
328  * ----------------------------------------------------------------
329  */
330 TupleTableSlot *
331 ExecBitmapHeapScan(BitmapHeapScanState *node)
332 {
333         /*
334          * use BitmapHeapNext as access method
335          */
336         return ExecScan(&node->ss, (ExecScanAccessMtd) BitmapHeapNext);
337 }
338
339 /* ----------------------------------------------------------------
340  *              ExecBitmapHeapReScan(node)
341  * ----------------------------------------------------------------
342  */
343 void
344 ExecBitmapHeapReScan(BitmapHeapScanState *node, ExprContext *exprCtxt)
345 {
346         EState     *estate;
347         Index           scanrelid;
348
349         estate = node->ss.ps.state;
350         scanrelid = ((BitmapHeapScan *) node->ss.ps.plan)->scan.scanrelid;
351
352         node->ss.ps.ps_TupFromTlist = false;
353
354         /*
355          * If we are being passed an outer tuple, link it into the "regular"
356          * per-tuple econtext for possible qual eval.
357          */
358         if (exprCtxt != NULL)
359         {
360                 ExprContext *stdecontext;
361
362                 stdecontext = node->ss.ps.ps_ExprContext;
363                 stdecontext->ecxt_outertuple = exprCtxt->ecxt_outertuple;
364         }
365
366         /* If this is re-scanning of PlanQual ... */
367         if (estate->es_evTuple != NULL &&
368                 estate->es_evTuple[scanrelid - 1] != NULL)
369         {
370                 estate->es_evTupleNull[scanrelid - 1] = false;
371         }
372
373         /* rescan to release any page pin */
374         heap_rescan(node->ss.ss_currentScanDesc, NULL);
375
376         if (node->tbm)
377                 tbm_free(node->tbm);
378         node->tbm = NULL;
379         node->tbmres = NULL;
380
381         /*
382          * Always rescan the input immediately, to ensure we can pass down any
383          * outer tuple that might be used in index quals.
384          */
385         ExecReScan(outerPlanState(node), exprCtxt);
386 }
387
388 /* ----------------------------------------------------------------
389  *              ExecEndBitmapHeapScan
390  * ----------------------------------------------------------------
391  */
392 void
393 ExecEndBitmapHeapScan(BitmapHeapScanState *node)
394 {
395         Relation        relation;
396         HeapScanDesc scanDesc;
397
398         /*
399          * extract information from the node
400          */
401         relation = node->ss.ss_currentRelation;
402         scanDesc = node->ss.ss_currentScanDesc;
403
404         /*
405          * Free the exprcontext
406          */
407         ExecFreeExprContext(&node->ss.ps);
408
409         /*
410          * clear out tuple table slots
411          */
412         ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
413         ExecClearTuple(node->ss.ss_ScanTupleSlot);
414
415         /*
416          * close down subplans
417          */
418         ExecEndNode(outerPlanState(node));
419
420         /*
421          * release bitmap if any
422          */
423         if (node->tbm)
424                 tbm_free(node->tbm);
425
426         /*
427          * close heap scan
428          */
429         heap_endscan(scanDesc);
430
431         /*
432          * close the heap relation.
433          */
434         ExecCloseScanRelation(relation);
435 }
436
437 /* ----------------------------------------------------------------
438  *              ExecInitBitmapHeapScan
439  *
440  *              Initializes the scan's state information.
441  * ----------------------------------------------------------------
442  */
443 BitmapHeapScanState *
444 ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
445 {
446         BitmapHeapScanState *scanstate;
447         Relation        currentRelation;
448
449         /* check for unsupported flags */
450         Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
451
452         /*
453          * Assert caller didn't ask for an unsafe snapshot --- see comments at
454          * head of file.
455          */
456         Assert(IsMVCCSnapshot(estate->es_snapshot));
457
458         /*
459          * create state structure
460          */
461         scanstate = makeNode(BitmapHeapScanState);
462         scanstate->ss.ps.plan = (Plan *) node;
463         scanstate->ss.ps.state = estate;
464
465         scanstate->tbm = NULL;
466         scanstate->tbmres = NULL;
467
468         /*
469          * Miscellaneous initialization
470          *
471          * create expression context for node
472          */
473         ExecAssignExprContext(estate, &scanstate->ss.ps);
474
475         scanstate->ss.ps.ps_TupFromTlist = false;
476
477         /*
478          * initialize child expressions
479          */
480         scanstate->ss.ps.targetlist = (List *)
481                 ExecInitExpr((Expr *) node->scan.plan.targetlist,
482                                          (PlanState *) scanstate);
483         scanstate->ss.ps.qual = (List *)
484                 ExecInitExpr((Expr *) node->scan.plan.qual,
485                                          (PlanState *) scanstate);
486         scanstate->bitmapqualorig = (List *)
487                 ExecInitExpr((Expr *) node->bitmapqualorig,
488                                          (PlanState *) scanstate);
489
490 #define BITMAPHEAPSCAN_NSLOTS 2
491
492         /*
493          * tuple table initialization
494          */
495         ExecInitResultTupleSlot(estate, &scanstate->ss.ps);
496         ExecInitScanTupleSlot(estate, &scanstate->ss);
497
498         /*
499          * open the base relation and acquire appropriate lock on it.
500          */
501         currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid);
502
503         scanstate->ss.ss_currentRelation = currentRelation;
504
505         /*
506          * Even though we aren't going to do a conventional seqscan, it is useful
507          * to create a HeapScanDesc --- most of the fields in it are usable.
508          */
509         scanstate->ss.ss_currentScanDesc = heap_beginscan_bm(currentRelation,
510                                                                                                                  estate->es_snapshot,
511                                                                                                                  0,
512                                                                                                                  NULL);
513
514         /*
515          * get the scan type from the relation descriptor.
516          */
517         ExecAssignScanType(&scanstate->ss, RelationGetDescr(currentRelation));
518
519         /*
520          * Initialize result tuple type and projection info.
521          */
522         ExecAssignResultTypeFromTL(&scanstate->ss.ps);
523         ExecAssignScanProjectionInfo(&scanstate->ss);
524
525         /*
526          * initialize child nodes
527          *
528          * We do this last because the child nodes will open indexscans on our
529          * relation's indexes, and we want to be sure we have acquired a lock on
530          * the relation first.
531          */
532         outerPlanState(scanstate) = ExecInitNode(outerPlan(node), estate, eflags);
533
534         /*
535          * all done.
536          */
537         return scanstate;
538 }
539
540 int
541 ExecCountSlotsBitmapHeapScan(BitmapHeapScan *node)
542 {
543         return ExecCountSlotsNode(outerPlan((Plan *) node)) +
544                 ExecCountSlotsNode(innerPlan((Plan *) node)) + BITMAPHEAPSCAN_NSLOTS;
545 }