1 /*-------------------------------------------------------------------------
4 * Routines copied from PostgreSQL core distribution.
6 * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
9 *-------------------------------------------------------------------------
12 static bool is_dummy_rel(RelOptInfo *rel);
13 static void mark_dummy_rel(RelOptInfo *rel);
14 static bool restriction_is_constant_false(List *restrictlist,
15 bool only_pushed_down);
19 * Determine whether a proposed join is legal given the query's
20 * join order constraints; and if it is, determine the join type.
22 * Caller must supply not only the two rels, but the union of their relids.
23 * (We could simplify the API by computing joinrelids locally, but this
24 * would be redundant work in the normal path through make_join_rel.)
26 * On success, *sjinfo_p is set to NULL if this is to be a plain inner join,
27 * else it's set to point to the associated SpecialJoinInfo node. Also,
28 * *reversed_p is set TRUE if the given relations need to be swapped to
29 * match the SpecialJoinInfo node.
32 join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
34 SpecialJoinInfo **sjinfo_p, bool *reversed_p)
36 SpecialJoinInfo *match_sjinfo;
43 * Ensure output params are set on failure return. This is just to
44 * suppress uninitialized-variable warnings from overly anal compilers.
50 * If we have any special joins, the proposed join might be illegal; and
51 * in any case we have to determine its join type. Scan the join info
57 is_valid_inner = true;
59 foreach(l, root->join_info_list)
61 SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(l);
64 * This special join is not relevant unless its RHS overlaps the
65 * proposed join. (Check this first as a fast path for dismissing
66 * most irrelevant SJs quickly.)
68 if (!bms_overlap(sjinfo->min_righthand, joinrelids))
72 * Also, not relevant if proposed join is fully contained within RHS
73 * (ie, we're still building up the RHS).
75 if (bms_is_subset(joinrelids, sjinfo->min_righthand))
79 * Also, not relevant if SJ is already done within either input.
81 if (bms_is_subset(sjinfo->min_lefthand, rel1->relids) &&
82 bms_is_subset(sjinfo->min_righthand, rel1->relids))
84 if (bms_is_subset(sjinfo->min_lefthand, rel2->relids) &&
85 bms_is_subset(sjinfo->min_righthand, rel2->relids))
89 * If it's a semijoin and we already joined the RHS to any other rels
90 * within either input, then we must have unique-ified the RHS at that
91 * point (see below). Therefore the semijoin is no longer relevant in
94 if (sjinfo->jointype == JOIN_SEMI)
96 if (bms_is_subset(sjinfo->syn_righthand, rel1->relids) &&
97 !bms_equal(sjinfo->syn_righthand, rel1->relids))
99 if (bms_is_subset(sjinfo->syn_righthand, rel2->relids) &&
100 !bms_equal(sjinfo->syn_righthand, rel2->relids))
105 * If one input contains min_lefthand and the other contains
106 * min_righthand, then we can perform the SJ at this join.
108 * Barf if we get matches to more than one SJ (is that possible?)
110 if (bms_is_subset(sjinfo->min_lefthand, rel1->relids) &&
111 bms_is_subset(sjinfo->min_righthand, rel2->relids))
114 return false; /* invalid join path */
115 match_sjinfo = sjinfo;
118 else if (bms_is_subset(sjinfo->min_lefthand, rel2->relids) &&
119 bms_is_subset(sjinfo->min_righthand, rel1->relids))
122 return false; /* invalid join path */
123 match_sjinfo = sjinfo;
126 else if (sjinfo->jointype == JOIN_SEMI &&
127 bms_equal(sjinfo->syn_righthand, rel2->relids) &&
128 create_unique_path(root, rel2, rel2->cheapest_total_path,
132 * For a semijoin, we can join the RHS to anything else by
133 * unique-ifying the RHS (if the RHS can be unique-ified).
134 * We will only get here if we have the full RHS but less
135 * than min_lefthand on the LHS.
137 * The reason to consider such a join path is exemplified by
138 * SELECT ... FROM a,b WHERE (a.x,b.y) IN (SELECT c1,c2 FROM c)
139 * If we insist on doing this as a semijoin we will first have
140 * to form the cartesian product of A*B. But if we unique-ify
141 * C then the semijoin becomes a plain innerjoin and we can join
142 * in any order, eg C to A and then to B. When C is much smaller
143 * than A and B this can be a huge win. So we allow C to be
144 * joined to just A or just B here, and then make_join_rel has
145 * to handle the case properly.
147 * Note that actually we'll allow unique-ified C to be joined to
148 * some other relation D here, too. That is legal, if usually not
149 * very sane, and this routine is only concerned with legality not
150 * with whether the join is good strategy.
154 return false; /* invalid join path */
155 match_sjinfo = sjinfo;
159 else if (sjinfo->jointype == JOIN_SEMI &&
160 bms_equal(sjinfo->syn_righthand, rel1->relids) &&
161 create_unique_path(root, rel1, rel1->cheapest_total_path,
164 /* Reversed semijoin case */
166 return false; /* invalid join path */
167 match_sjinfo = sjinfo;
174 * Otherwise, the proposed join overlaps the RHS but isn't
175 * a valid implementation of this SJ. It might still be
176 * a legal join, however. If both inputs overlap the RHS,
177 * assume that it's OK. Since the inputs presumably got past
178 * this function's checks previously, they can't overlap the
179 * LHS and their violations of the RHS boundary must represent
180 * SJs that have been determined to commute with this one.
181 * We have to allow this to work correctly in cases like
182 * (a LEFT JOIN (b JOIN (c LEFT JOIN d)))
183 * when the c/d join has been determined to commute with the join
184 * to a, and hence d is not part of min_righthand for the upper
185 * join. It should be legal to join b to c/d but this will appear
186 * as a violation of the upper join's RHS.
187 * Furthermore, if one input overlaps the RHS and the other does
188 * not, we should still allow the join if it is a valid
189 * implementation of some other SJ. We have to allow this to
190 * support the associative identity
191 * (a LJ b on Pab) LJ c ON Pbc = a LJ (b LJ c ON Pbc) on Pab
192 * since joining B directly to C violates the lower SJ's RHS.
193 * We assume that make_outerjoininfo() set things up correctly
194 * so that we'll only match to some SJ if the join is valid.
195 * Set flag here to check at bottom of loop.
198 if (sjinfo->jointype != JOIN_SEMI &&
199 bms_overlap(rel1->relids, sjinfo->min_righthand) &&
200 bms_overlap(rel2->relids, sjinfo->min_righthand))
203 Assert(!bms_overlap(joinrelids, sjinfo->min_lefthand));
206 is_valid_inner = false;
211 * Fail if violated some SJ's RHS and didn't match to another SJ. However,
212 * "matching" to a semijoin we are implementing by unique-ification
213 * doesn't count (think: it's really an inner join).
215 if (!is_valid_inner &&
216 (match_sjinfo == NULL || unique_ified))
217 return false; /* invalid join path */
219 /* Otherwise, it's a valid join */
220 *sjinfo_p = match_sjinfo;
221 *reversed_p = reversed;
228 * Find or create a join RelOptInfo that represents the join of
229 * the two given rels, and add to it path information for paths
230 * created with the two rels as outer and inner rel.
231 * (The join rel may already contain paths generated from other
232 * pairs of rels that add up to the same set of base rels.)
234 * NB: will return NULL if attempted join is not valid. This can happen
235 * when working with outer joins, or with IN or EXISTS clauses that have been
239 make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
242 SpecialJoinInfo *sjinfo;
244 SpecialJoinInfo sjinfo_data;
248 /* We should never try to join two overlapping sets of rels. */
249 Assert(!bms_overlap(rel1->relids, rel2->relids));
251 /* Construct Relids set that identifies the joinrel. */
252 joinrelids = bms_union(rel1->relids, rel2->relids);
254 /* Check validity and determine join type. */
255 if (!join_is_legal(root, rel1, rel2, joinrelids,
258 /* invalid join path */
259 bms_free(joinrelids);
263 /* Swap rels if needed to match the join info. */
266 RelOptInfo *trel = rel1;
273 * If it's a plain inner join, then we won't have found anything in
274 * join_info_list. Make up a SpecialJoinInfo so that selectivity
275 * estimation functions will know what's being joined.
279 sjinfo = &sjinfo_data;
280 sjinfo->type = T_SpecialJoinInfo;
281 sjinfo->min_lefthand = rel1->relids;
282 sjinfo->min_righthand = rel2->relids;
283 sjinfo->syn_lefthand = rel1->relids;
284 sjinfo->syn_righthand = rel2->relids;
285 sjinfo->jointype = JOIN_INNER;
286 /* we don't bother trying to make the remaining fields valid */
287 sjinfo->lhs_strict = false;
288 sjinfo->delay_upper_joins = false;
289 sjinfo->join_quals = NIL;
293 * Find or build the join RelOptInfo, and compute the restrictlist that
294 * goes with this particular joining.
296 joinrel = build_join_rel(root, joinrelids, rel1, rel2, sjinfo,
300 * If we've already proven this join is empty, we needn't consider any
303 if (is_dummy_rel(joinrel))
305 bms_free(joinrelids);
310 * Consider paths using each rel as both outer and inner. Depending on
311 * the join type, a provably empty outer or inner rel might mean the join
312 * is provably empty too; in which case throw away any previously computed
313 * paths and mark the join as dummy. (We do it this way since it's
314 * conceivable that dummy-ness of a multi-element join might only be
315 * noticeable for certain construction paths.)
317 * Also, a provably constant-false join restriction typically means that
318 * we can skip evaluating one or both sides of the join. We do this by
319 * marking the appropriate rel as dummy. For outer joins, a
320 * constant-false restriction that is pushed down still means the whole
321 * join is dummy, while a non-pushed-down one means that no inner rows
322 * will join so we can treat the inner rel as dummy.
324 * We need only consider the jointypes that appear in join_info_list, plus
327 switch (sjinfo->jointype)
330 if (is_dummy_rel(rel1) || is_dummy_rel(rel2) ||
331 restriction_is_constant_false(restrictlist, false))
333 mark_dummy_rel(joinrel);
336 add_paths_to_joinrel(root, joinrel, rel1, rel2,
339 add_paths_to_joinrel(root, joinrel, rel2, rel1,
344 if (is_dummy_rel(rel1) ||
345 restriction_is_constant_false(restrictlist, true))
347 mark_dummy_rel(joinrel);
350 if (restriction_is_constant_false(restrictlist, false) &&
351 bms_is_subset(rel2->relids, sjinfo->syn_righthand))
352 mark_dummy_rel(rel2);
353 add_paths_to_joinrel(root, joinrel, rel1, rel2,
356 add_paths_to_joinrel(root, joinrel, rel2, rel1,
361 if ((is_dummy_rel(rel1) && is_dummy_rel(rel2)) ||
362 restriction_is_constant_false(restrictlist, true))
364 mark_dummy_rel(joinrel);
367 add_paths_to_joinrel(root, joinrel, rel1, rel2,
370 add_paths_to_joinrel(root, joinrel, rel2, rel1,
375 * If there are join quals that aren't mergeable or hashable, we
376 * may not be able to build any valid plan. Complain here so that
377 * we can give a somewhat-useful error message. (Since we have no
378 * flexibility of planning for a full join, there's no chance of
379 * succeeding later with another pair of input rels.)
381 if (joinrel->pathlist == NIL)
383 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
384 errmsg("FULL JOIN is only supported with merge-joinable or hash-joinable join conditions")));
389 * We might have a normal semijoin, or a case where we don't have
390 * enough rels to do the semijoin but can unique-ify the RHS and
391 * then do an innerjoin (see comments in join_is_legal). In the
392 * latter case we can't apply JOIN_SEMI joining.
394 if (bms_is_subset(sjinfo->min_lefthand, rel1->relids) &&
395 bms_is_subset(sjinfo->min_righthand, rel2->relids))
397 if (is_dummy_rel(rel1) || is_dummy_rel(rel2) ||
398 restriction_is_constant_false(restrictlist, false))
400 mark_dummy_rel(joinrel);
403 add_paths_to_joinrel(root, joinrel, rel1, rel2,
409 * If we know how to unique-ify the RHS and one input rel is
410 * exactly the RHS (not a superset) we can consider unique-ifying
411 * it and then doing a regular join. (The create_unique_path
412 * check here is probably redundant with what join_is_legal did,
413 * but if so the check is cheap because it's cached. So test
414 * anyway to be sure.)
416 if (bms_equal(sjinfo->syn_righthand, rel2->relids) &&
417 create_unique_path(root, rel2, rel2->cheapest_total_path,
420 if (is_dummy_rel(rel1) || is_dummy_rel(rel2) ||
421 restriction_is_constant_false(restrictlist, false))
423 mark_dummy_rel(joinrel);
426 add_paths_to_joinrel(root, joinrel, rel1, rel2,
427 JOIN_UNIQUE_INNER, sjinfo,
429 add_paths_to_joinrel(root, joinrel, rel2, rel1,
430 JOIN_UNIQUE_OUTER, sjinfo,
435 if (is_dummy_rel(rel1) ||
436 restriction_is_constant_false(restrictlist, true))
438 mark_dummy_rel(joinrel);
441 if (restriction_is_constant_false(restrictlist, false) &&
442 bms_is_subset(rel2->relids, sjinfo->syn_righthand))
443 mark_dummy_rel(rel2);
444 add_paths_to_joinrel(root, joinrel, rel1, rel2,
449 /* other values not expected here */
450 elog(ERROR, "unrecognized join type: %d", (int) sjinfo->jointype);
454 bms_free(joinrelids);
460 * is_dummy_rel --- has relation been proven empty?
462 * If so, it will have a single path that is dummy.
465 is_dummy_rel(RelOptInfo *rel)
467 return (rel->cheapest_total_path != NULL &&
468 IS_DUMMY_PATH(rel->cheapest_total_path));
473 * Mark a relation as proven empty.
475 * During GEQO planning, this can get invoked more than once on the same
476 * baserel struct, so it's worth checking to see if the rel is already marked
479 * Also, when called during GEQO join planning, we are in a short-lived
480 * memory context. We must make sure that the dummy path attached to a
481 * baserel survives the GEQO cycle, else the baserel is trashed for future
482 * GEQO cycles. On the other hand, when we are marking a joinrel during GEQO,
483 * we don't want the dummy path to clutter the main planning context. Upshot
484 * is that the best solution is to explicitly make the dummy path in the same
485 * context the given RelOptInfo is in.
488 mark_dummy_rel(RelOptInfo *rel)
490 MemoryContext oldcontext;
492 /* Already marked? */
493 if (is_dummy_rel(rel))
496 /* No, so choose correct context to make the dummy path in */
497 oldcontext = MemoryContextSwitchTo(GetMemoryChunkContext(rel));
499 /* Set dummy size estimate */
502 /* Evict any previously chosen paths */
505 /* Set up the dummy path */
506 add_path(rel, (Path *) create_append_path(rel, NIL));
508 /* Set or update cheapest_total_path */
511 MemoryContextSwitchTo(oldcontext);
516 * restriction_is_constant_false --- is a restrictlist just FALSE?
518 * In cases where a qual is provably constant FALSE, eval_const_expressions
519 * will generally have thrown away anything that's ANDed with it. In outer
520 * join situations this will leave us computing cartesian products only to
521 * decide there's no match for an outer row, which is pretty stupid. So,
522 * we need to detect the case.
524 * If only_pushed_down is TRUE, then consider only pushed-down quals.
527 restriction_is_constant_false(List *restrictlist, bool only_pushed_down)
532 * Despite the above comment, the restriction list we see here might
533 * possibly have other members besides the FALSE constant, since other
534 * quals could get "pushed down" to the outer join level. So we check
535 * each member of the list.
537 foreach(lc, restrictlist)
539 RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
541 Assert(IsA(rinfo, RestrictInfo));
542 if (only_pushed_down && !rinfo->is_pushed_down)
545 if (rinfo->clause && IsA(rinfo->clause, Const))
547 Const *con = (Const *) rinfo->clause;
549 /* constant NULL is as good as constant FALSE for our purposes */
550 if (con->constisnull)
552 if (!DatumGetBool(con->constvalue))