OSDN Git Service

Re-run pgindent, fixing a problem where comment lines after a blank
[pg-rex/syncrep.git] / src / backend / executor / nodeHashjoin.c
1 /*-------------------------------------------------------------------------
2  *
3  * nodeHashjoin.c
4  *        Routines to handle hash join nodes
5  *
6  * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *        $PostgreSQL: pgsql/src/backend/executor/nodeHashjoin.c,v 1.77 2005/11/22 18:17:10 momjian Exp $
12  *
13  *-------------------------------------------------------------------------
14  */
15
16 #include "postgres.h"
17
18 #include "executor/executor.h"
19 #include "executor/hashjoin.h"
20 #include "executor/nodeHash.h"
21 #include "executor/nodeHashjoin.h"
22 #include "optimizer/clauses.h"
23 #include "utils/memutils.h"
24
25
26 static TupleTableSlot *ExecHashJoinOuterGetTuple(PlanState *outerNode,
27                                                   HashJoinState *hjstate,
28                                                   uint32 *hashvalue);
29 static TupleTableSlot *ExecHashJoinGetSavedTuple(HashJoinState *hjstate,
30                                                   BufFile *file,
31                                                   uint32 *hashvalue,
32                                                   TupleTableSlot *tupleSlot);
33 static int      ExecHashJoinNewBatch(HashJoinState *hjstate);
34
35
36 /* ----------------------------------------------------------------
37  *              ExecHashJoin
38  *
39  *              This function implements the Hybrid Hashjoin algorithm.
40  *
41  *              Note: the relation we build hash table on is the "inner"
42  *                        the other one is "outer".
43  * ----------------------------------------------------------------
44  */
45 TupleTableSlot *                                /* return: a tuple or NULL */
46 ExecHashJoin(HashJoinState *node)
47 {
48         EState     *estate;
49         PlanState  *outerNode;
50         HashState  *hashNode;
51         List       *joinqual;
52         List       *otherqual;
53         TupleTableSlot *inntuple;
54         ExprContext *econtext;
55         ExprDoneCond isDone;
56         HashJoinTable hashtable;
57         HeapTuple       curtuple;
58         TupleTableSlot *outerTupleSlot;
59         uint32          hashvalue;
60         int                     batchno;
61
62         /*
63          * get information from HashJoin node
64          */
65         estate = node->js.ps.state;
66         joinqual = node->js.joinqual;
67         otherqual = node->js.ps.qual;
68         hashNode = (HashState *) innerPlanState(node);
69         outerNode = outerPlanState(node);
70
71         /*
72          * get information from HashJoin state
73          */
74         hashtable = node->hj_HashTable;
75         econtext = node->js.ps.ps_ExprContext;
76
77         /*
78          * Check to see if we're still projecting out tuples from a previous join
79          * tuple (because there is a function-returning-set in the projection
80          * expressions).  If so, try to project another one.
81          */
82         if (node->js.ps.ps_TupFromTlist)
83         {
84                 TupleTableSlot *result;
85
86                 result = ExecProject(node->js.ps.ps_ProjInfo, &isDone);
87                 if (isDone == ExprMultipleResult)
88                         return result;
89                 /* Done with that source tuple... */
90                 node->js.ps.ps_TupFromTlist = false;
91         }
92
93         /*
94          * If we're doing an IN join, we want to return at most one row per outer
95          * tuple; so we can stop scanning the inner scan if we matched on the
96          * previous try.
97          */
98         if (node->js.jointype == JOIN_IN && node->hj_MatchedOuter)
99                 node->hj_NeedNewOuter = true;
100
101         /*
102          * Reset per-tuple memory context to free any expression evaluation
103          * storage allocated in the previous tuple cycle.  Note this can't happen
104          * until we're done projecting out tuples from a join tuple.
105          */
106         ResetExprContext(econtext);
107
108         /*
109          * if this is the first call, build the hash table for inner relation
110          */
111         if (hashtable == NULL)
112         {
113                 /*
114                  * If the outer relation is completely empty, we can quit without
115                  * building the hash table.  However, for an inner join it is only a
116                  * win to check this when the outer relation's startup cost is less
117                  * than the projected cost of building the hash table.  Otherwise it's
118                  * best to build the hash table first and see if the inner relation is
119                  * empty.  (When it's an outer join, we should always make this check,
120                  * since we aren't going to be able to skip the join on the strength
121                  * of an empty inner relation anyway.)
122                  *
123                  * The only way to make the check is to try to fetch a tuple from the
124                  * outer plan node.  If we succeed, we have to stash it away for later
125                  * consumption by ExecHashJoinOuterGetTuple.
126                  */
127                 if (outerNode->plan->startup_cost < hashNode->ps.plan->total_cost ||
128                         node->js.jointype == JOIN_LEFT)
129                 {
130                         node->hj_FirstOuterTupleSlot = ExecProcNode(outerNode);
131                         if (TupIsNull(node->hj_FirstOuterTupleSlot))
132                                 return NULL;
133                 }
134                 else
135                         node->hj_FirstOuterTupleSlot = NULL;
136
137                 /*
138                  * create the hash table
139                  */
140                 hashtable = ExecHashTableCreate((Hash *) hashNode->ps.plan,
141                                                                                 node->hj_HashOperators);
142                 node->hj_HashTable = hashtable;
143
144                 /*
145                  * execute the Hash node, to build the hash table
146                  */
147                 hashNode->hashtable = hashtable;
148                 (void) MultiExecProcNode((PlanState *) hashNode);
149
150                 /*
151                  * If the inner relation is completely empty, and we're not doing an
152                  * outer join, we can quit without scanning the outer relation.
153                  */
154                 if (hashtable->totalTuples == 0 && node->js.jointype != JOIN_LEFT)
155                 {
156                         ExecHashTableDestroy(hashtable);
157                         node->hj_HashTable = NULL;
158                         node->hj_FirstOuterTupleSlot = NULL;
159                         return NULL;
160                 }
161
162                 /*
163                  * need to remember whether nbatch has increased since we began
164                  * scanning the outer relation
165                  */
166                 hashtable->nbatch_outstart = hashtable->nbatch;
167         }
168
169         /*
170          * run the hash join process
171          */
172         for (;;)
173         {
174                 /*
175                  * If we don't have an outer tuple, get the next one
176                  */
177                 if (node->hj_NeedNewOuter)
178                 {
179                         outerTupleSlot = ExecHashJoinOuterGetTuple(outerNode,
180                                                                                                            node,
181                                                                                                            &hashvalue);
182                         if (TupIsNull(outerTupleSlot))
183                         {
184                                 /* end of join */
185                                 return NULL;
186                         }
187
188                         node->js.ps.ps_OuterTupleSlot = outerTupleSlot;
189                         econtext->ecxt_outertuple = outerTupleSlot;
190                         node->hj_NeedNewOuter = false;
191                         node->hj_MatchedOuter = false;
192
193                         /*
194                          * now we have an outer tuple, find the corresponding bucket for
195                          * this tuple from the hash table
196                          */
197                         node->hj_CurHashValue = hashvalue;
198                         ExecHashGetBucketAndBatch(hashtable, hashvalue,
199                                                                           &node->hj_CurBucketNo, &batchno);
200                         node->hj_CurTuple = NULL;
201
202                         /*
203                          * Now we've got an outer tuple and the corresponding hash bucket,
204                          * but this tuple may not belong to the current batch.
205                          */
206                         if (batchno != hashtable->curbatch)
207                         {
208                                 /*
209                                  * Need to postpone this outer tuple to a later batch. Save it
210                                  * in the corresponding outer-batch file.
211                                  */
212                                 Assert(batchno > hashtable->curbatch);
213                                 ExecHashJoinSaveTuple(ExecFetchSlotTuple(outerTupleSlot),
214                                                                           hashvalue,
215                                                                           &hashtable->outerBatchFile[batchno]);
216                                 node->hj_NeedNewOuter = true;
217                                 continue;               /* loop around for a new outer tuple */
218                         }
219                 }
220
221                 /*
222                  * OK, scan the selected hash bucket for matches
223                  */
224                 for (;;)
225                 {
226                         curtuple = ExecScanHashBucket(node, econtext);
227                         if (curtuple == NULL)
228                                 break;                  /* out of matches */
229
230                         /*
231                          * we've got a match, but still need to test non-hashed quals
232                          */
233                         inntuple = ExecStoreTuple(curtuple,
234                                                                           node->hj_HashTupleSlot,
235                                                                           InvalidBuffer,
236                                                                           false);       /* don't pfree this tuple */
237                         econtext->ecxt_innertuple = inntuple;
238
239                         /* reset temp memory each time to avoid leaks from qual expr */
240                         ResetExprContext(econtext);
241
242                         /*
243                          * if we pass the qual, then save state for next call and have
244                          * ExecProject form the projection, store it in the tuple table,
245                          * and return the slot.
246                          *
247                          * Only the joinquals determine MatchedOuter status, but all quals
248                          * must pass to actually return the tuple.
249                          */
250                         if (joinqual == NIL || ExecQual(joinqual, econtext, false))
251                         {
252                                 node->hj_MatchedOuter = true;
253
254                                 if (otherqual == NIL || ExecQual(otherqual, econtext, false))
255                                 {
256                                         TupleTableSlot *result;
257
258                                         result = ExecProject(node->js.ps.ps_ProjInfo, &isDone);
259
260                                         if (isDone != ExprEndResult)
261                                         {
262                                                 node->js.ps.ps_TupFromTlist =
263                                                         (isDone == ExprMultipleResult);
264                                                 return result;
265                                         }
266                                 }
267
268                                 /*
269                                  * If we didn't return a tuple, may need to set NeedNewOuter
270                                  */
271                                 if (node->js.jointype == JOIN_IN)
272                                 {
273                                         node->hj_NeedNewOuter = true;
274                                         break;          /* out of loop over hash bucket */
275                                 }
276                         }
277                 }
278
279                 /*
280                  * Now the current outer tuple has run out of matches, so check
281                  * whether to emit a dummy outer-join tuple. If not, loop around to
282                  * get a new outer tuple.
283                  */
284                 node->hj_NeedNewOuter = true;
285
286                 if (!node->hj_MatchedOuter &&
287                         node->js.jointype == JOIN_LEFT)
288                 {
289                         /*
290                          * We are doing an outer join and there were no join matches for
291                          * this outer tuple.  Generate a fake join tuple with nulls for
292                          * the inner tuple, and return it if it passes the non-join quals.
293                          */
294                         econtext->ecxt_innertuple = node->hj_NullInnerTupleSlot;
295
296                         if (ExecQual(otherqual, econtext, false))
297                         {
298                                 /*
299                                  * qualification was satisfied so we project and return the
300                                  * slot containing the result tuple using ExecProject().
301                                  */
302                                 TupleTableSlot *result;
303
304                                 result = ExecProject(node->js.ps.ps_ProjInfo, &isDone);
305
306                                 if (isDone != ExprEndResult)
307                                 {
308                                         node->js.ps.ps_TupFromTlist =
309                                                 (isDone == ExprMultipleResult);
310                                         return result;
311                                 }
312                         }
313                 }
314         }
315 }
316
317 /* ----------------------------------------------------------------
318  *              ExecInitHashJoin
319  *
320  *              Init routine for HashJoin node.
321  * ----------------------------------------------------------------
322  */
323 HashJoinState *
324 ExecInitHashJoin(HashJoin *node, EState *estate)
325 {
326         HashJoinState *hjstate;
327         Plan       *outerNode;
328         Hash       *hashNode;
329         List       *lclauses;
330         List       *rclauses;
331         List       *hoperators;
332         ListCell   *l;
333
334         /*
335          * create state structure
336          */
337         hjstate = makeNode(HashJoinState);
338         hjstate->js.ps.plan = (Plan *) node;
339         hjstate->js.ps.state = estate;
340
341         /*
342          * Miscellaneous initialization
343          *
344          * create expression context for node
345          */
346         ExecAssignExprContext(estate, &hjstate->js.ps);
347
348         /*
349          * initialize child expressions
350          */
351         hjstate->js.ps.targetlist = (List *)
352                 ExecInitExpr((Expr *) node->join.plan.targetlist,
353                                          (PlanState *) hjstate);
354         hjstate->js.ps.qual = (List *)
355                 ExecInitExpr((Expr *) node->join.plan.qual,
356                                          (PlanState *) hjstate);
357         hjstate->js.jointype = node->join.jointype;
358         hjstate->js.joinqual = (List *)
359                 ExecInitExpr((Expr *) node->join.joinqual,
360                                          (PlanState *) hjstate);
361         hjstate->hashclauses = (List *)
362                 ExecInitExpr((Expr *) node->hashclauses,
363                                          (PlanState *) hjstate);
364
365         /*
366          * initialize child nodes
367          */
368         outerNode = outerPlan(node);
369         hashNode = (Hash *) innerPlan(node);
370
371         outerPlanState(hjstate) = ExecInitNode(outerNode, estate);
372         innerPlanState(hjstate) = ExecInitNode((Plan *) hashNode, estate);
373
374 #define HASHJOIN_NSLOTS 3
375
376         /*
377          * tuple table initialization
378          */
379         ExecInitResultTupleSlot(estate, &hjstate->js.ps);
380         hjstate->hj_OuterTupleSlot = ExecInitExtraTupleSlot(estate);
381
382         switch (node->join.jointype)
383         {
384                 case JOIN_INNER:
385                 case JOIN_IN:
386                         break;
387                 case JOIN_LEFT:
388                         hjstate->hj_NullInnerTupleSlot =
389                                 ExecInitNullTupleSlot(estate,
390                                                                  ExecGetResultType(innerPlanState(hjstate)));
391                         break;
392                 default:
393                         elog(ERROR, "unrecognized join type: %d",
394                                  (int) node->join.jointype);
395         }
396
397         /*
398          * now for some voodoo.  our temporary tuple slot is actually the result
399          * tuple slot of the Hash node (which is our inner plan).  we do this
400          * because Hash nodes don't return tuples via ExecProcNode() -- instead
401          * the hash join node uses ExecScanHashBucket() to get at the contents of
402          * the hash table.      -cim 6/9/91
403          */
404         {
405                 HashState  *hashstate = (HashState *) innerPlanState(hjstate);
406                 TupleTableSlot *slot = hashstate->ps.ps_ResultTupleSlot;
407
408                 hjstate->hj_HashTupleSlot = slot;
409         }
410
411         /*
412          * initialize tuple type and projection info
413          */
414         ExecAssignResultTypeFromTL(&hjstate->js.ps);
415         ExecAssignProjectionInfo(&hjstate->js.ps);
416
417         ExecSetSlotDescriptor(hjstate->hj_OuterTupleSlot,
418                                                   ExecGetResultType(outerPlanState(hjstate)),
419                                                   false);
420
421         /*
422          * initialize hash-specific info
423          */
424         hjstate->hj_HashTable = NULL;
425         hjstate->hj_FirstOuterTupleSlot = NULL;
426
427         hjstate->hj_CurHashValue = 0;
428         hjstate->hj_CurBucketNo = 0;
429         hjstate->hj_CurTuple = NULL;
430
431         /*
432          * Deconstruct the hash clauses into outer and inner argument values, so
433          * that we can evaluate those subexpressions separately.  Also make a list
434          * of the hash operator OIDs, in preparation for looking up the hash
435          * functions to use.
436          */
437         lclauses = NIL;
438         rclauses = NIL;
439         hoperators = NIL;
440         foreach(l, hjstate->hashclauses)
441         {
442                 FuncExprState *fstate = (FuncExprState *) lfirst(l);
443                 OpExpr     *hclause;
444
445                 Assert(IsA(fstate, FuncExprState));
446                 hclause = (OpExpr *) fstate->xprstate.expr;
447                 Assert(IsA(hclause, OpExpr));
448                 lclauses = lappend(lclauses, linitial(fstate->args));
449                 rclauses = lappend(rclauses, lsecond(fstate->args));
450                 hoperators = lappend_oid(hoperators, hclause->opno);
451         }
452         hjstate->hj_OuterHashKeys = lclauses;
453         hjstate->hj_InnerHashKeys = rclauses;
454         hjstate->hj_HashOperators = hoperators;
455         /* child Hash node needs to evaluate inner hash keys, too */
456         ((HashState *) innerPlanState(hjstate))->hashkeys = rclauses;
457
458         hjstate->js.ps.ps_OuterTupleSlot = NULL;
459         hjstate->js.ps.ps_TupFromTlist = false;
460         hjstate->hj_NeedNewOuter = true;
461         hjstate->hj_MatchedOuter = false;
462
463         return hjstate;
464 }
465
466 int
467 ExecCountSlotsHashJoin(HashJoin *node)
468 {
469         return ExecCountSlotsNode(outerPlan(node)) +
470                 ExecCountSlotsNode(innerPlan(node)) +
471                 HASHJOIN_NSLOTS;
472 }
473
474 /* ----------------------------------------------------------------
475  *              ExecEndHashJoin
476  *
477  *              clean up routine for HashJoin node
478  * ----------------------------------------------------------------
479  */
480 void
481 ExecEndHashJoin(HashJoinState *node)
482 {
483         /*
484          * Free hash table
485          */
486         if (node->hj_HashTable)
487         {
488                 ExecHashTableDestroy(node->hj_HashTable);
489                 node->hj_HashTable = NULL;
490                 node->hj_FirstOuterTupleSlot = NULL;
491         }
492
493         /*
494          * Free the exprcontext
495          */
496         ExecFreeExprContext(&node->js.ps);
497
498         /*
499          * clean out the tuple table
500          */
501         ExecClearTuple(node->js.ps.ps_ResultTupleSlot);
502         ExecClearTuple(node->hj_OuterTupleSlot);
503         ExecClearTuple(node->hj_HashTupleSlot);
504
505         /*
506          * clean up subtrees
507          */
508         ExecEndNode(outerPlanState(node));
509         ExecEndNode(innerPlanState(node));
510 }
511
512 /*
513  * ExecHashJoinOuterGetTuple
514  *
515  *              get the next outer tuple for hashjoin: either by
516  *              executing a plan node in the first pass, or from
517  *              the temp files for the hashjoin batches.
518  *
519  * Returns a null slot if no more outer tuples.  On success, the tuple's
520  * hash value is stored at *hashvalue --- this is either originally computed,
521  * or re-read from the temp file.
522  */
523 static TupleTableSlot *
524 ExecHashJoinOuterGetTuple(PlanState *outerNode,
525                                                   HashJoinState *hjstate,
526                                                   uint32 *hashvalue)
527 {
528         HashJoinTable hashtable = hjstate->hj_HashTable;
529         int                     curbatch = hashtable->curbatch;
530         TupleTableSlot *slot;
531
532         if (curbatch == 0)
533         {                                                       /* if it is the first pass */
534
535                 /*
536                  * Check to see if first outer tuple was already fetched by
537                  * ExecHashJoin() and not used yet.
538                  */
539                 slot = hjstate->hj_FirstOuterTupleSlot;
540                 if (!TupIsNull(slot))
541                         hjstate->hj_FirstOuterTupleSlot = NULL;
542                 else
543                         slot = ExecProcNode(outerNode);
544                 if (!TupIsNull(slot))
545                 {
546                         /*
547                          * We have to compute the tuple's hash value.
548                          */
549                         ExprContext *econtext = hjstate->js.ps.ps_ExprContext;
550
551                         econtext->ecxt_outertuple = slot;
552                         *hashvalue = ExecHashGetHashValue(hashtable, econtext,
553                                                                                           hjstate->hj_OuterHashKeys);
554
555                         return slot;
556                 }
557
558                 /*
559                  * We have just reached the end of the first pass. Try to switch to a
560                  * saved batch.
561                  */
562                 curbatch = ExecHashJoinNewBatch(hjstate);
563         }
564
565         /*
566          * Try to read from a temp file. Loop allows us to advance to new batches
567          * as needed.  NOTE: nbatch could increase inside ExecHashJoinNewBatch, so
568          * don't try to optimize this loop.
569          */
570         while (curbatch < hashtable->nbatch)
571         {
572                 slot = ExecHashJoinGetSavedTuple(hjstate,
573                                                                                  hashtable->outerBatchFile[curbatch],
574                                                                                  hashvalue,
575                                                                                  hjstate->hj_OuterTupleSlot);
576                 if (!TupIsNull(slot))
577                         return slot;
578                 curbatch = ExecHashJoinNewBatch(hjstate);
579         }
580
581         /* Out of batches... */
582         return NULL;
583 }
584
585 /*
586  * ExecHashJoinNewBatch
587  *              switch to a new hashjoin batch
588  *
589  * Returns the number of the new batch (1..nbatch-1), or nbatch if no more.
590  * We will never return a batch number that has an empty outer batch file.
591  */
592 static int
593 ExecHashJoinNewBatch(HashJoinState *hjstate)
594 {
595         HashJoinTable hashtable = hjstate->hj_HashTable;
596         int                     nbatch;
597         int                     curbatch;
598         BufFile    *innerFile;
599         TupleTableSlot *slot;
600         uint32          hashvalue;
601
602 start_over:
603         nbatch = hashtable->nbatch;
604         curbatch = hashtable->curbatch;
605
606         if (curbatch > 0)
607         {
608                 /*
609                  * We no longer need the previous outer batch file; close it right
610                  * away to free disk space.
611                  */
612                 if (hashtable->outerBatchFile[curbatch])
613                         BufFileClose(hashtable->outerBatchFile[curbatch]);
614                 hashtable->outerBatchFile[curbatch] = NULL;
615         }
616
617         /*
618          * We can always skip over any batches that are completely empty on both
619          * sides.  We can sometimes skip over batches that are empty on only one
620          * side, but there are exceptions:
621          *
622          * 1. In a LEFT JOIN, we have to process outer batches even if the inner
623          * batch is empty.
624          *
625          * 2. If we have increased nbatch since the initial estimate, we have to
626          * scan inner batches since they might contain tuples that need to be
627          * reassigned to later inner batches.
628          *
629          * 3. Similarly, if we have increased nbatch since starting the outer
630          * scan, we have to rescan outer batches in case they contain tuples that
631          * need to be reassigned.
632          */
633         curbatch++;
634         while (curbatch < nbatch &&
635                    (hashtable->outerBatchFile[curbatch] == NULL ||
636                         hashtable->innerBatchFile[curbatch] == NULL))
637         {
638                 if (hashtable->outerBatchFile[curbatch] &&
639                         hjstate->js.jointype == JOIN_LEFT)
640                         break;                          /* must process due to rule 1 */
641                 if (hashtable->innerBatchFile[curbatch] &&
642                         nbatch != hashtable->nbatch_original)
643                         break;                          /* must process due to rule 2 */
644                 if (hashtable->outerBatchFile[curbatch] &&
645                         nbatch != hashtable->nbatch_outstart)
646                         break;                          /* must process due to rule 3 */
647                 /* We can ignore this batch. */
648                 /* Release associated temp files right away. */
649                 if (hashtable->innerBatchFile[curbatch])
650                         BufFileClose(hashtable->innerBatchFile[curbatch]);
651                 hashtable->innerBatchFile[curbatch] = NULL;
652                 if (hashtable->outerBatchFile[curbatch])
653                         BufFileClose(hashtable->outerBatchFile[curbatch]);
654                 hashtable->outerBatchFile[curbatch] = NULL;
655                 curbatch++;
656         }
657
658         if (curbatch >= nbatch)
659                 return curbatch;                /* no more batches */
660
661         hashtable->curbatch = curbatch;
662
663         /*
664          * Reload the hash table with the new inner batch (which could be empty)
665          */
666         ExecHashTableReset(hashtable);
667
668         innerFile = hashtable->innerBatchFile[curbatch];
669
670         if (innerFile != NULL)
671         {
672                 if (BufFileSeek(innerFile, 0, 0L, SEEK_SET))
673                         ereport(ERROR,
674                                         (errcode_for_file_access(),
675                                    errmsg("could not rewind hash-join temporary file: %m")));
676
677                 while ((slot = ExecHashJoinGetSavedTuple(hjstate,
678                                                                                                  innerFile,
679                                                                                                  &hashvalue,
680                                                                                                  hjstate->hj_HashTupleSlot)))
681                 {
682                         /*
683                          * NOTE: some tuples may be sent to future batches.  Also, it is
684                          * possible for hashtable->nbatch to be increased here!
685                          */
686                         ExecHashTableInsert(hashtable,
687                                                                 ExecFetchSlotTuple(slot),
688                                                                 hashvalue);
689                 }
690
691                 /*
692                  * after we build the hash table, the inner batch file is no longer
693                  * needed
694                  */
695                 BufFileClose(innerFile);
696                 hashtable->innerBatchFile[curbatch] = NULL;
697         }
698
699         /*
700          * If there's no outer batch file, advance to next batch.
701          */
702         if (hashtable->outerBatchFile[curbatch] == NULL)
703                 goto start_over;
704
705         /*
706          * Rewind outer batch file, so that we can start reading it.
707          */
708         if (BufFileSeek(hashtable->outerBatchFile[curbatch], 0, 0L, SEEK_SET))
709                 ereport(ERROR,
710                                 (errcode_for_file_access(),
711                                  errmsg("could not rewind hash-join temporary file: %m")));
712
713         return curbatch;
714 }
715
716 /*
717  * ExecHashJoinSaveTuple
718  *              save a tuple to a batch file.
719  *
720  * The data recorded in the file for each tuple is its hash value,
721  * then an image of its HeapTupleData (with meaningless t_data pointer)
722  * followed by the HeapTupleHeader and tuple data.
723  *
724  * Note: it is important always to call this in the regular executor
725  * context, not in a shorter-lived context; else the temp file buffers
726  * will get messed up.
727  */
728 void
729 ExecHashJoinSaveTuple(HeapTuple heapTuple, uint32 hashvalue,
730                                           BufFile **fileptr)
731 {
732         BufFile    *file = *fileptr;
733         size_t          written;
734
735         if (file == NULL)
736         {
737                 /* First write to this batch file, so open it. */
738                 file = BufFileCreateTemp(false);
739                 *fileptr = file;
740         }
741
742         written = BufFileWrite(file, (void *) &hashvalue, sizeof(uint32));
743         if (written != sizeof(uint32))
744                 ereport(ERROR,
745                                 (errcode_for_file_access(),
746                                  errmsg("could not write to hash-join temporary file: %m")));
747
748         written = BufFileWrite(file, (void *) heapTuple, sizeof(HeapTupleData));
749         if (written != sizeof(HeapTupleData))
750                 ereport(ERROR,
751                                 (errcode_for_file_access(),
752                                  errmsg("could not write to hash-join temporary file: %m")));
753
754         written = BufFileWrite(file, (void *) heapTuple->t_data, heapTuple->t_len);
755         if (written != (size_t) heapTuple->t_len)
756                 ereport(ERROR,
757                                 (errcode_for_file_access(),
758                                  errmsg("could not write to hash-join temporary file: %m")));
759 }
760
761 /*
762  * ExecHashJoinGetSavedTuple
763  *              read the next tuple from a batch file.  Return NULL if no more.
764  *
765  * On success, *hashvalue is set to the tuple's hash value, and the tuple
766  * itself is stored in the given slot.
767  */
768 static TupleTableSlot *
769 ExecHashJoinGetSavedTuple(HashJoinState *hjstate,
770                                                   BufFile *file,
771                                                   uint32 *hashvalue,
772                                                   TupleTableSlot *tupleSlot)
773 {
774         HeapTupleData htup;
775         size_t          nread;
776         HeapTuple       heapTuple;
777
778         nread = BufFileRead(file, (void *) hashvalue, sizeof(uint32));
779         if (nread == 0)
780                 return NULL;                    /* end of file */
781         if (nread != sizeof(uint32))
782                 ereport(ERROR,
783                                 (errcode_for_file_access(),
784                                  errmsg("could not read from hash-join temporary file: %m")));
785         nread = BufFileRead(file, (void *) &htup, sizeof(HeapTupleData));
786         if (nread != sizeof(HeapTupleData))
787                 ereport(ERROR,
788                                 (errcode_for_file_access(),
789                                  errmsg("could not read from hash-join temporary file: %m")));
790         heapTuple = palloc(HEAPTUPLESIZE + htup.t_len);
791         memcpy((char *) heapTuple, (char *) &htup, sizeof(HeapTupleData));
792         heapTuple->t_data = (HeapTupleHeader)
793                 ((char *) heapTuple + HEAPTUPLESIZE);
794         nread = BufFileRead(file, (void *) heapTuple->t_data, htup.t_len);
795         if (nread != (size_t) htup.t_len)
796                 ereport(ERROR,
797                                 (errcode_for_file_access(),
798                                  errmsg("could not read from hash-join temporary file: %m")));
799         return ExecStoreTuple(heapTuple, tupleSlot, InvalidBuffer, true);
800 }
801
802
803 void
804 ExecReScanHashJoin(HashJoinState *node, ExprContext *exprCtxt)
805 {
806         /*
807          * If we haven't yet built the hash table then we can just return; nothing
808          * done yet, so nothing to undo.
809          */
810         if (node->hj_HashTable == NULL)
811                 return;
812
813         /*
814          * In a multi-batch join, we currently have to do rescans the hard way,
815          * primarily because batch temp files may have already been released. But
816          * if it's a single-batch join, and there is no parameter change for the
817          * inner subnode, then we can just re-use the existing hash table without
818          * rebuilding it.
819          */
820         if (node->hj_HashTable->nbatch == 1 &&
821                 ((PlanState *) node)->righttree->chgParam == NULL)
822         {
823                 /* okay to reuse the hash table; needn't rescan inner, either */
824         }
825         else
826         {
827                 /* must destroy and rebuild hash table */
828                 ExecHashTableDestroy(node->hj_HashTable);
829                 node->hj_HashTable = NULL;
830                 node->hj_FirstOuterTupleSlot = NULL;
831
832                 /*
833                  * if chgParam of subnode is not null then plan will be re-scanned by
834                  * first ExecProcNode.
835                  */
836                 if (((PlanState *) node)->righttree->chgParam == NULL)
837                         ExecReScan(((PlanState *) node)->righttree, exprCtxt);
838         }
839
840         /* Always reset intra-tuple state */
841         node->hj_CurHashValue = 0;
842         node->hj_CurBucketNo = 0;
843         node->hj_CurTuple = NULL;
844
845         node->js.ps.ps_OuterTupleSlot = NULL;
846         node->js.ps.ps_TupFromTlist = false;
847         node->hj_NeedNewOuter = true;
848         node->hj_MatchedOuter = false;
849
850         /*
851          * if chgParam of subnode is not null then plan will be re-scanned by
852          * first ExecProcNode.
853          */
854         if (((PlanState *) node)->lefttree->chgParam == NULL)
855                 ExecReScan(((PlanState *) node)->lefttree, exprCtxt);
856 }