From f39d57b83c823439ebb3b680f94eaf396f02520e Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Fri, 28 May 2010 01:14:03 +0000 Subject: [PATCH] Rejigger mergejoin logic so that a tuple with a null in the first merge column is treated like end-of-input, if nulls sort last in that column and we are not doing outer-join filling for that input. In such a case, the tuple cannot join to anything from the other input (because we assume mergejoinable operators are strict), and neither can any tuple following it in the sort order. If we're not interested in doing outer-join filling we can just pretend the tuple and its successors aren't there at all. This can save a great deal of time in situations where there are many nulls in the join column, as in a recent example from Scott Marlowe. Also, since the planner tends to not count nulls in its mergejoin scan selectivity estimates, this is an important fix to make the runtime behavior more like the estimate. I regard this as an omission in the patch I wrote years ago to teach mergejoin that tuples containing nulls aren't joinable, so I'm back-patching it. But only to 8.3 --- in older versions, we didn't have a solid notion of whether nulls sort high or low, so attempting to apply this optimization could break things. --- src/backend/executor/nodeMergejoin.c | 501 +++++++++++++++++++---------------- 1 file changed, 268 insertions(+), 233 deletions(-) diff --git a/src/backend/executor/nodeMergejoin.c b/src/backend/executor/nodeMergejoin.c index 8404c4d464..e3b736f907 100644 --- a/src/backend/executor/nodeMergejoin.c +++ b/src/backend/executor/nodeMergejoin.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/executor/nodeMergejoin.c,v 1.101 2010/02/26 02:00:42 momjian Exp $ + * $PostgreSQL: pgsql/src/backend/executor/nodeMergejoin.c,v 1.102 2010/05/28 01:14:03 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -131,6 +131,14 @@ typedef struct MergeJoinClauseData FmgrInfo cmpfinfo; } MergeJoinClauseData; +/* Result type for MJEvalOuterValues and MJEvalInnerValues */ +typedef enum +{ + MJEVAL_MATCHABLE, /* normal, potentially matchable tuple */ + MJEVAL_NONMATCHABLE, /* tuple cannot join because it has a null */ + MJEVAL_ENDOFJOIN /* end of input (physical or effective) */ +} MJEvalResult; + #define MarkInnerTuple(innerTupleSlot, mergestate) \ ExecCopySlot((mergestate)->mj_MarkedTupleSlot, (innerTupleSlot)) @@ -241,19 +249,34 @@ MJExamineQuals(List *mergeclauses, * Compute the values of the mergejoined expressions for the current * outer tuple. We also detect whether it's impossible for the current * outer tuple to match anything --- this is true if it yields a NULL - * input, since we assume mergejoin operators are strict. + * input, since we assume mergejoin operators are strict. If the NULL + * is in the first join column, and that column sorts nulls last, then + * we can further conclude that no following tuple can match anything + * either, since they must all have nulls in the first column. However, + * that case is only interesting if we're not in FillOuter mode, else + * we have to visit all the tuples anyway. + * + * For the convenience of callers, we also make this routine responsible + * for testing for end-of-input (null outer tuple), and returning + * MJEVAL_ENDOFJOIN when that's seen. This allows the same code to be used + * for both real end-of-input and the effective end-of-input represented by + * a first-column NULL. * * We evaluate the values in OuterEContext, which can be reset each * time we move to a new tuple. */ -static bool +static MJEvalResult MJEvalOuterValues(MergeJoinState *mergestate) { ExprContext *econtext = mergestate->mj_OuterEContext; - bool canmatch = true; + MJEvalResult result = MJEVAL_MATCHABLE; int i; MemoryContext oldContext; + /* Check for end of outer subplan */ + if (TupIsNull(mergestate->mj_OuterTupleSlot)) + return MJEVAL_ENDOFJOIN; + ResetExprContext(econtext); oldContext = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory); @@ -267,12 +290,18 @@ MJEvalOuterValues(MergeJoinState *mergestate) clause->ldatum = ExecEvalExpr(clause->lexpr, econtext, &clause->lisnull, NULL); if (clause->lisnull) - canmatch = false; + { + /* match is impossible; can we end the join early? */ + if (i == 0 && !clause->nulls_first && !mergestate->mj_FillOuter) + result = MJEVAL_ENDOFJOIN; + else if (result == MJEVAL_MATCHABLE) + result = MJEVAL_NONMATCHABLE; + } } MemoryContextSwitchTo(oldContext); - return canmatch; + return result; } /* @@ -282,14 +311,18 @@ MJEvalOuterValues(MergeJoinState *mergestate) * to load data from either the true current inner, or the marked inner, * so caller must tell us which slot to load from. */ -static bool +static MJEvalResult MJEvalInnerValues(MergeJoinState *mergestate, TupleTableSlot *innerslot) { ExprContext *econtext = mergestate->mj_InnerEContext; - bool canmatch = true; + MJEvalResult result = MJEVAL_MATCHABLE; int i; MemoryContext oldContext; + /* Check for end of inner subplan */ + if (TupIsNull(innerslot)) + return MJEVAL_ENDOFJOIN; + ResetExprContext(econtext); oldContext = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory); @@ -303,12 +336,18 @@ MJEvalInnerValues(MergeJoinState *mergestate, TupleTableSlot *innerslot) clause->rdatum = ExecEvalExpr(clause->rexpr, econtext, &clause->risnull, NULL); if (clause->risnull) - canmatch = false; + { + /* match is impossible; can we end the join early? */ + if (i == 0 && !clause->nulls_first && !mergestate->mj_FillInner) + result = MJEVAL_ENDOFJOIN; + else if (result == MJEVAL_MATCHABLE) + result = MJEVAL_NONMATCHABLE; + } } MemoryContextSwitchTo(oldContext); - return canmatch; + return result; } /* @@ -656,46 +695,46 @@ ExecMergeJoin(MergeJoinState *node) outerTupleSlot = ExecProcNode(outerPlan); node->mj_OuterTupleSlot = outerTupleSlot; - if (TupIsNull(outerTupleSlot)) - { - MJ_printf("ExecMergeJoin: nothing in outer subplan\n"); - if (doFillInner) - { - /* - * Need to emit right-join tuples for remaining inner - * tuples. We set MatchedInner = true to force the - * ENDOUTER state to advance inner. - */ - node->mj_JoinState = EXEC_MJ_ENDOUTER; - node->mj_MatchedInner = true; - break; - } - /* Otherwise we're done. */ - return NULL; - } /* Compute join values and check for unmatchability */ - if (MJEvalOuterValues(node)) - { - /* OK to go get the first inner tuple */ - node->mj_JoinState = EXEC_MJ_INITIALIZE_INNER; - } - else + switch (MJEvalOuterValues(node)) { - /* Stay in same state to fetch next outer tuple */ - if (doFillOuter) - { - /* - * Generate a fake join tuple with nulls for the inner - * tuple, and return it if it passes the non-join - * quals. - */ - TupleTableSlot *result; + case MJEVAL_MATCHABLE: + /* OK to go get the first inner tuple */ + node->mj_JoinState = EXEC_MJ_INITIALIZE_INNER; + break; + case MJEVAL_NONMATCHABLE: + /* Stay in same state to fetch next outer tuple */ + if (doFillOuter) + { + /* + * Generate a fake join tuple with nulls for the + * inner tuple, and return it if it passes the + * non-join quals. + */ + TupleTableSlot *result; - result = MJFillOuter(node); - if (result) - return result; - } + result = MJFillOuter(node); + if (result) + return result; + } + break; + case MJEVAL_ENDOFJOIN: + /* No more outer tuples */ + MJ_printf("ExecMergeJoin: nothing in outer subplan\n"); + if (doFillInner) + { + /* + * Need to emit right-join tuples for remaining + * inner tuples. We set MatchedInner = true to + * force the ENDOUTER state to advance inner. + */ + node->mj_JoinState = EXEC_MJ_ENDOUTER; + node->mj_MatchedInner = true; + break; + } + /* Otherwise we're done. */ + return NULL; } break; @@ -704,53 +743,54 @@ ExecMergeJoin(MergeJoinState *node) innerTupleSlot = ExecProcNode(innerPlan); node->mj_InnerTupleSlot = innerTupleSlot; - if (TupIsNull(innerTupleSlot)) - { - MJ_printf("ExecMergeJoin: nothing in inner subplan\n"); - if (doFillOuter) - { - /* - * Need to emit left-join tuples for all outer tuples, - * including the one we just fetched. We set - * MatchedOuter = false to force the ENDINNER state to - * emit first tuple before advancing outer. - */ - node->mj_JoinState = EXEC_MJ_ENDINNER; - node->mj_MatchedOuter = false; - break; - } - /* Otherwise we're done. */ - return NULL; - } /* Compute join values and check for unmatchability */ - if (MJEvalInnerValues(node, innerTupleSlot)) - { - /* - * OK, we have the initial tuples. Begin by skipping - * non-matching tuples. - */ - node->mj_JoinState = EXEC_MJ_SKIP_TEST; - } - else + switch (MJEvalInnerValues(node, innerTupleSlot)) { - /* Mark before advancing, if wanted */ - if (node->mj_ExtraMarks) - ExecMarkPos(innerPlan); - /* Stay in same state to fetch next inner tuple */ - if (doFillInner) - { + case MJEVAL_MATCHABLE: /* - * Generate a fake join tuple with nulls for the outer - * tuple, and return it if it passes the non-join - * quals. + * OK, we have the initial tuples. Begin by skipping + * non-matching tuples. */ - TupleTableSlot *result; + node->mj_JoinState = EXEC_MJ_SKIP_TEST; + break; + case MJEVAL_NONMATCHABLE: + /* Mark before advancing, if wanted */ + if (node->mj_ExtraMarks) + ExecMarkPos(innerPlan); + /* Stay in same state to fetch next inner tuple */ + if (doFillInner) + { + /* + * Generate a fake join tuple with nulls for the + * outer tuple, and return it if it passes the + * non-join quals. + */ + TupleTableSlot *result; - result = MJFillInner(node); - if (result) - return result; - } + result = MJFillInner(node); + if (result) + return result; + } + break; + case MJEVAL_ENDOFJOIN: + /* No more inner tuples */ + MJ_printf("ExecMergeJoin: nothing in inner subplan\n"); + if (doFillOuter) + { + /* + * Need to emit left-join tuples for all outer + * tuples, including the one we just fetched. We + * set MatchedOuter = false to force the ENDINNER + * state to emit first tuple before advancing + * outer. + */ + node->mj_JoinState = EXEC_MJ_ENDINNER; + node->mj_MatchedOuter = false; + break; + } + /* Otherwise we're done. */ + return NULL; } break; @@ -878,41 +918,51 @@ ExecMergeJoin(MergeJoinState *node) MJ_DEBUG_PROC_NODE(innerTupleSlot); node->mj_MatchedInner = false; - if (TupIsNull(innerTupleSlot)) - { - node->mj_JoinState = EXEC_MJ_NEXTOUTER; - break; - } - - /* - * Load up the new inner tuple's comparison values. If we see - * that it contains a NULL and hence can't match any outer - * tuple, we can skip the comparison and assume the new tuple - * is greater than current outer. - */ - if (!MJEvalInnerValues(node, innerTupleSlot)) + /* Compute join values and check for unmatchability */ + switch (MJEvalInnerValues(node, innerTupleSlot)) { - node->mj_JoinState = EXEC_MJ_NEXTOUTER; - break; - } - - /* - * Test the new inner tuple to see if it matches outer. - * - * If they do match, then we join them and move on to the next - * inner tuple (EXEC_MJ_JOINTUPLES). - * - * If they do not match then advance to next outer tuple. - */ - compareResult = MJCompare(node); - MJ_DEBUG_COMPARE(compareResult); + case MJEVAL_MATCHABLE: + /* + * Test the new inner tuple to see if it matches + * outer. + * + * If they do match, then we join them and move on to + * the next inner tuple (EXEC_MJ_JOINTUPLES). + * + * If they do not match then advance to next outer + * tuple. + */ + compareResult = MJCompare(node); + MJ_DEBUG_COMPARE(compareResult); - if (compareResult == 0) - node->mj_JoinState = EXEC_MJ_JOINTUPLES; - else - { - Assert(compareResult < 0); - node->mj_JoinState = EXEC_MJ_NEXTOUTER; + if (compareResult == 0) + node->mj_JoinState = EXEC_MJ_JOINTUPLES; + else + { + Assert(compareResult < 0); + node->mj_JoinState = EXEC_MJ_NEXTOUTER; + } + break; + case MJEVAL_NONMATCHABLE: + /* + * It contains a NULL and hence can't match any outer + * tuple, so we can skip the comparison and assume the + * new tuple is greater than current outer. + */ + node->mj_JoinState = EXEC_MJ_NEXTOUTER; + break; + case MJEVAL_ENDOFJOIN: + /* + * No more inner tuples. However, this might be + * only effective and not physical end of inner plan, + * so force mj_InnerTupleSlot to null to make sure we + * don't fetch more inner tuples. (We need this hack + * because we are not transiting to a state where the + * inner plan is assumed to be exhausted.) + */ + node->mj_InnerTupleSlot = NULL; + node->mj_JoinState = EXEC_MJ_NEXTOUTER; + break; } break; @@ -962,37 +1012,32 @@ ExecMergeJoin(MergeJoinState *node) MJ_DEBUG_PROC_NODE(outerTupleSlot); node->mj_MatchedOuter = false; - /* - * if the outer tuple is null then we are done with the join, - * unless we have inner tuples we need to null-fill. - */ - if (TupIsNull(outerTupleSlot)) - { - MJ_printf("ExecMergeJoin: end of outer subplan\n"); - innerTupleSlot = node->mj_InnerTupleSlot; - if (doFillInner && !TupIsNull(innerTupleSlot)) - { - /* - * Need to emit right-join tuples for remaining inner - * tuples. - */ - node->mj_JoinState = EXEC_MJ_ENDOUTER; - break; - } - /* Otherwise we're done. */ - return NULL; - } - /* Compute join values and check for unmatchability */ - if (MJEvalOuterValues(node)) - { - /* Go test the new tuple against the marked tuple */ - node->mj_JoinState = EXEC_MJ_TESTOUTER; - } - else + switch (MJEvalOuterValues(node)) { - /* Can't match, so fetch next outer tuple */ - node->mj_JoinState = EXEC_MJ_NEXTOUTER; + case MJEVAL_MATCHABLE: + /* Go test the new tuple against the marked tuple */ + node->mj_JoinState = EXEC_MJ_TESTOUTER; + break; + case MJEVAL_NONMATCHABLE: + /* Can't match, so fetch next outer tuple */ + node->mj_JoinState = EXEC_MJ_NEXTOUTER; + break; + case MJEVAL_ENDOFJOIN: + /* No more outer tuples */ + MJ_printf("ExecMergeJoin: end of outer subplan\n"); + innerTupleSlot = node->mj_InnerTupleSlot; + if (doFillInner && !TupIsNull(innerTupleSlot)) + { + /* + * Need to emit right-join tuples for remaining + * inner tuples. + */ + node->mj_JoinState = EXEC_MJ_ENDOUTER; + break; + } + /* Otherwise we're done. */ + return NULL; } break; @@ -1093,39 +1138,39 @@ ExecMergeJoin(MergeJoinState *node) * larger than our marked inner tuples. So we need not * revisit any of the marked tuples but can proceed to * look for a match to the current inner. If there's - * no more inners, we are done. + * no more inners, no more matches are possible. * ---------------- */ Assert(compareResult > 0); innerTupleSlot = node->mj_InnerTupleSlot; - if (TupIsNull(innerTupleSlot)) + + /* reload comparison data for current inner */ + switch (MJEvalInnerValues(node, innerTupleSlot)) { - if (doFillOuter) - { + case MJEVAL_MATCHABLE: + /* proceed to compare it to the current outer */ + node->mj_JoinState = EXEC_MJ_SKIP_TEST; + break; + case MJEVAL_NONMATCHABLE: /* - * Need to emit left-join tuples for remaining - * outer tuples. + * current inner can't possibly match any outer; + * better to advance the inner scan than the outer. */ - node->mj_JoinState = EXEC_MJ_ENDINNER; + node->mj_JoinState = EXEC_MJ_SKIPINNER_ADVANCE; break; - } - /* Otherwise we're done. */ - return NULL; - } - - /* reload comparison data for current inner */ - if (MJEvalInnerValues(node, innerTupleSlot)) - { - /* proceed to compare it to the current outer */ - node->mj_JoinState = EXEC_MJ_SKIP_TEST; - } - else - { - /* - * current inner can't possibly match any outer; - * better to advance the inner scan than the outer. - */ - node->mj_JoinState = EXEC_MJ_SKIPINNER_ADVANCE; + case MJEVAL_ENDOFJOIN: + /* No more inner tuples */ + if (doFillOuter) + { + /* + * Need to emit left-join tuples for remaining + * outer tuples. + */ + node->mj_JoinState = EXEC_MJ_ENDINNER; + break; + } + /* Otherwise we're done. */ + return NULL; } } break; @@ -1218,37 +1263,32 @@ ExecMergeJoin(MergeJoinState *node) MJ_DEBUG_PROC_NODE(outerTupleSlot); node->mj_MatchedOuter = false; - /* - * if the outer tuple is null then we are done with the join, - * unless we have inner tuples we need to null-fill. - */ - if (TupIsNull(outerTupleSlot)) - { - MJ_printf("ExecMergeJoin: end of outer subplan\n"); - innerTupleSlot = node->mj_InnerTupleSlot; - if (doFillInner && !TupIsNull(innerTupleSlot)) - { - /* - * Need to emit right-join tuples for remaining inner - * tuples. - */ - node->mj_JoinState = EXEC_MJ_ENDOUTER; - break; - } - /* Otherwise we're done. */ - return NULL; - } - /* Compute join values and check for unmatchability */ - if (MJEvalOuterValues(node)) - { - /* Go test the new tuple against the current inner */ - node->mj_JoinState = EXEC_MJ_SKIP_TEST; - } - else + switch (MJEvalOuterValues(node)) { - /* Can't match, so fetch next outer tuple */ - node->mj_JoinState = EXEC_MJ_SKIPOUTER_ADVANCE; + case MJEVAL_MATCHABLE: + /* Go test the new tuple against the current inner */ + node->mj_JoinState = EXEC_MJ_SKIP_TEST; + break; + case MJEVAL_NONMATCHABLE: + /* Can't match, so fetch next outer tuple */ + node->mj_JoinState = EXEC_MJ_SKIPOUTER_ADVANCE; + break; + case MJEVAL_ENDOFJOIN: + /* No more outer tuples */ + MJ_printf("ExecMergeJoin: end of outer subplan\n"); + innerTupleSlot = node->mj_InnerTupleSlot; + if (doFillInner && !TupIsNull(innerTupleSlot)) + { + /* + * Need to emit right-join tuples for remaining + * inner tuples. + */ + node->mj_JoinState = EXEC_MJ_ENDOUTER; + break; + } + /* Otherwise we're done. */ + return NULL; } break; @@ -1289,40 +1329,35 @@ ExecMergeJoin(MergeJoinState *node) MJ_DEBUG_PROC_NODE(innerTupleSlot); node->mj_MatchedInner = false; - /* - * if the inner tuple is null then we are done with the join, - * unless we have outer tuples we need to null-fill. - */ - if (TupIsNull(innerTupleSlot)) + /* Compute join values and check for unmatchability */ + switch (MJEvalInnerValues(node, innerTupleSlot)) { - MJ_printf("ExecMergeJoin: end of inner subplan\n"); - outerTupleSlot = node->mj_OuterTupleSlot; - if (doFillOuter && !TupIsNull(outerTupleSlot)) - { + case MJEVAL_MATCHABLE: + /* proceed to compare it to the current outer */ + node->mj_JoinState = EXEC_MJ_SKIP_TEST; + break; + case MJEVAL_NONMATCHABLE: /* - * Need to emit left-join tuples for remaining outer - * tuples. + * current inner can't possibly match any outer; + * better to advance the inner scan than the outer. */ - node->mj_JoinState = EXEC_MJ_ENDINNER; + node->mj_JoinState = EXEC_MJ_SKIPINNER_ADVANCE; break; - } - /* Otherwise we're done. */ - return NULL; - } - - /* Compute join values and check for unmatchability */ - if (MJEvalInnerValues(node, innerTupleSlot)) - { - /* proceed to compare it to the current outer */ - node->mj_JoinState = EXEC_MJ_SKIP_TEST; - } - else - { - /* - * current inner can't possibly match any outer; better to - * advance the inner scan than the outer. - */ - node->mj_JoinState = EXEC_MJ_SKIPINNER_ADVANCE; + case MJEVAL_ENDOFJOIN: + /* No more inner tuples */ + MJ_printf("ExecMergeJoin: end of inner subplan\n"); + outerTupleSlot = node->mj_OuterTupleSlot; + if (doFillOuter && !TupIsNull(outerTupleSlot)) + { + /* + * Need to emit left-join tuples for remaining + * outer tuples. + */ + node->mj_JoinState = EXEC_MJ_ENDINNER; + break; + } + /* Otherwise we're done. */ + return NULL; } break; -- 2.11.0