OSDN Git Service

88c9abf9e5d4edc827683dc21a7441de79a890d4
[pghintplan/pg_hint_plan.git] / make_join_rel.c
1 /*-------------------------------------------------------------------------
2  *
3  * make_join_rel.c
4  *        Routines copied from PostgreSQL core distribution.
5  *
6  * Portions Copyright (c) 1996-2011, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *-------------------------------------------------------------------------
10  */
11
12 static bool is_dummy_rel(RelOptInfo *rel);
13 static void mark_dummy_rel(RelOptInfo *rel);
14 static bool restriction_is_constant_false(List *restrictlist,
15                                                           bool only_pushed_down);
16
17 /*
18  * join_is_legal
19  *         Determine whether a proposed join is legal given the query's
20  *         join order constraints; and if it is, determine the join type.
21  *
22  * Caller must supply not only the two rels, but the union of their relids.
23  * (We could simplify the API by computing joinrelids locally, but this
24  * would be redundant work in the normal path through make_join_rel.)
25  *
26  * On success, *sjinfo_p is set to NULL if this is to be a plain inner join,
27  * else it's set to point to the associated SpecialJoinInfo node.  Also,
28  * *reversed_p is set TRUE if the given relations need to be swapped to
29  * match the SpecialJoinInfo node.
30  */
31 static bool
32 join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
33                           Relids joinrelids,
34                           SpecialJoinInfo **sjinfo_p, bool *reversed_p)
35 {
36         SpecialJoinInfo *match_sjinfo;
37         bool            reversed;
38         bool            unique_ified;
39         bool            is_valid_inner;
40         ListCell   *l;
41
42         /*
43          * Ensure output params are set on failure return.      This is just to
44          * suppress uninitialized-variable warnings from overly anal compilers.
45          */
46         *sjinfo_p = NULL;
47         *reversed_p = false;
48
49         /*
50          * If we have any special joins, the proposed join might be illegal; and
51          * in any case we have to determine its join type.      Scan the join info
52          * list for conflicts.
53          */
54         match_sjinfo = NULL;
55         reversed = false;
56         unique_ified = false;
57         is_valid_inner = true;
58
59         foreach(l, root->join_info_list)
60         {
61                 SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(l);
62
63                 /*
64                  * This special join is not relevant unless its RHS overlaps the
65                  * proposed join.  (Check this first as a fast path for dismissing
66                  * most irrelevant SJs quickly.)
67                  */
68                 if (!bms_overlap(sjinfo->min_righthand, joinrelids))
69                         continue;
70
71                 /*
72                  * Also, not relevant if proposed join is fully contained within RHS
73                  * (ie, we're still building up the RHS).
74                  */
75                 if (bms_is_subset(joinrelids, sjinfo->min_righthand))
76                         continue;
77
78                 /*
79                  * Also, not relevant if SJ is already done within either input.
80                  */
81                 if (bms_is_subset(sjinfo->min_lefthand, rel1->relids) &&
82                         bms_is_subset(sjinfo->min_righthand, rel1->relids))
83                         continue;
84                 if (bms_is_subset(sjinfo->min_lefthand, rel2->relids) &&
85                         bms_is_subset(sjinfo->min_righthand, rel2->relids))
86                         continue;
87
88                 /*
89                  * If it's a semijoin and we already joined the RHS to any other rels
90                  * within either input, then we must have unique-ified the RHS at that
91                  * point (see below).  Therefore the semijoin is no longer relevant in
92                  * this join path.
93                  */
94                 if (sjinfo->jointype == JOIN_SEMI)
95                 {
96                         if (bms_is_subset(sjinfo->syn_righthand, rel1->relids) &&
97                                 !bms_equal(sjinfo->syn_righthand, rel1->relids))
98                                 continue;
99                         if (bms_is_subset(sjinfo->syn_righthand, rel2->relids) &&
100                                 !bms_equal(sjinfo->syn_righthand, rel2->relids))
101                                 continue;
102                 }
103
104                 /*
105                  * If one input contains min_lefthand and the other contains
106                  * min_righthand, then we can perform the SJ at this join.
107                  *
108                  * Barf if we get matches to more than one SJ (is that possible?)
109                  */
110                 if (bms_is_subset(sjinfo->min_lefthand, rel1->relids) &&
111                         bms_is_subset(sjinfo->min_righthand, rel2->relids))
112                 {
113                         if (match_sjinfo)
114                                 return false;   /* invalid join path */
115                         match_sjinfo = sjinfo;
116                         reversed = false;
117                 }
118                 else if (bms_is_subset(sjinfo->min_lefthand, rel2->relids) &&
119                                  bms_is_subset(sjinfo->min_righthand, rel1->relids))
120                 {
121                         if (match_sjinfo)
122                                 return false;   /* invalid join path */
123                         match_sjinfo = sjinfo;
124                         reversed = true;
125                 }
126                 else if (sjinfo->jointype == JOIN_SEMI &&
127                                  bms_equal(sjinfo->syn_righthand, rel2->relids) &&
128                                  create_unique_path(root, rel2, rel2->cheapest_total_path,
129                                                                         sjinfo) != NULL)
130                 {
131                         /*----------
132                          * For a semijoin, we can join the RHS to anything else by
133                          * unique-ifying the RHS (if the RHS can be unique-ified).
134                          * We will only get here if we have the full RHS but less
135                          * than min_lefthand on the LHS.
136                          *
137                          * The reason to consider such a join path is exemplified by
138                          *      SELECT ... FROM a,b WHERE (a.x,b.y) IN (SELECT c1,c2 FROM c)
139                          * If we insist on doing this as a semijoin we will first have
140                          * to form the cartesian product of A*B.  But if we unique-ify
141                          * C then the semijoin becomes a plain innerjoin and we can join
142                          * in any order, eg C to A and then to B.  When C is much smaller
143                          * than A and B this can be a huge win.  So we allow C to be
144                          * joined to just A or just B here, and then make_join_rel has
145                          * to handle the case properly.
146                          *
147                          * Note that actually we'll allow unique-ified C to be joined to
148                          * some other relation D here, too.  That is legal, if usually not
149                          * very sane, and this routine is only concerned with legality not
150                          * with whether the join is good strategy.
151                          *----------
152                          */
153                         if (match_sjinfo)
154                                 return false;   /* invalid join path */
155                         match_sjinfo = sjinfo;
156                         reversed = false;
157                         unique_ified = true;
158                 }
159                 else if (sjinfo->jointype == JOIN_SEMI &&
160                                  bms_equal(sjinfo->syn_righthand, rel1->relids) &&
161                                  create_unique_path(root, rel1, rel1->cheapest_total_path,
162                                                                         sjinfo) != NULL)
163                 {
164                         /* Reversed semijoin case */
165                         if (match_sjinfo)
166                                 return false;   /* invalid join path */
167                         match_sjinfo = sjinfo;
168                         reversed = true;
169                         unique_ified = true;
170                 }
171                 else
172                 {
173                         /*----------
174                          * Otherwise, the proposed join overlaps the RHS but isn't
175                          * a valid implementation of this SJ.  It might still be
176                          * a legal join, however.  If both inputs overlap the RHS,
177                          * assume that it's OK.  Since the inputs presumably got past
178                          * this function's checks previously, they can't overlap the
179                          * LHS and their violations of the RHS boundary must represent
180                          * SJs that have been determined to commute with this one.
181                          * We have to allow this to work correctly in cases like
182                          *              (a LEFT JOIN (b JOIN (c LEFT JOIN d)))
183                          * when the c/d join has been determined to commute with the join
184                          * to a, and hence d is not part of min_righthand for the upper
185                          * join.  It should be legal to join b to c/d but this will appear
186                          * as a violation of the upper join's RHS.
187                          * Furthermore, if one input overlaps the RHS and the other does
188                          * not, we should still allow the join if it is a valid
189                          * implementation of some other SJ.  We have to allow this to
190                          * support the associative identity
191                          *              (a LJ b on Pab) LJ c ON Pbc = a LJ (b LJ c ON Pbc) on Pab
192                          * since joining B directly to C violates the lower SJ's RHS.
193                          * We assume that make_outerjoininfo() set things up correctly
194                          * so that we'll only match to some SJ if the join is valid.
195                          * Set flag here to check at bottom of loop.
196                          *----------
197                          */
198                         if (sjinfo->jointype != JOIN_SEMI &&
199                                 bms_overlap(rel1->relids, sjinfo->min_righthand) &&
200                                 bms_overlap(rel2->relids, sjinfo->min_righthand))
201                         {
202                                 /* seems OK */
203                                 Assert(!bms_overlap(joinrelids, sjinfo->min_lefthand));
204                         }
205                         else
206                                 is_valid_inner = false;
207                 }
208         }
209
210         /*
211          * Fail if violated some SJ's RHS and didn't match to another SJ. However,
212          * "matching" to a semijoin we are implementing by unique-ification
213          * doesn't count (think: it's really an inner join).
214          */
215         if (!is_valid_inner &&
216                 (match_sjinfo == NULL || unique_ified))
217                 return false;                   /* invalid join path */
218
219         /* Otherwise, it's a valid join */
220         *sjinfo_p = match_sjinfo;
221         *reversed_p = reversed;
222         return true;
223 }
224
225
226 /*
227  * make_join_rel
228  *         Find or create a join RelOptInfo that represents the join of
229  *         the two given rels, and add to it path information for paths
230  *         created with the two rels as outer and inner rel.
231  *         (The join rel may already contain paths generated from other
232  *         pairs of rels that add up to the same set of base rels.)
233  *
234  * NB: will return NULL if attempted join is not valid.  This can happen
235  * when working with outer joins, or with IN or EXISTS clauses that have been
236  * turned into joins.
237  */
238 RelOptInfo *
239 make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
240 {
241         Relids          joinrelids;
242         SpecialJoinInfo *sjinfo;
243         bool            reversed;
244         SpecialJoinInfo sjinfo_data;
245         RelOptInfo *joinrel;
246         List       *restrictlist;
247
248         /* We should never try to join two overlapping sets of rels. */
249         Assert(!bms_overlap(rel1->relids, rel2->relids));
250
251         /* Construct Relids set that identifies the joinrel. */
252         joinrelids = bms_union(rel1->relids, rel2->relids);
253
254         /* Check validity and determine join type. */
255         if (!join_is_legal(root, rel1, rel2, joinrelids,
256                                            &sjinfo, &reversed))
257         {
258                 /* invalid join path */
259                 bms_free(joinrelids);
260                 return NULL;
261         }
262
263         /* Swap rels if needed to match the join info. */
264         if (reversed)
265         {
266                 RelOptInfo *trel = rel1;
267
268                 rel1 = rel2;
269                 rel2 = trel;
270         }
271
272         /*
273          * If it's a plain inner join, then we won't have found anything in
274          * join_info_list.      Make up a SpecialJoinInfo so that selectivity
275          * estimation functions will know what's being joined.
276          */
277         if (sjinfo == NULL)
278         {
279                 sjinfo = &sjinfo_data;
280                 sjinfo->type = T_SpecialJoinInfo;
281                 sjinfo->min_lefthand = rel1->relids;
282                 sjinfo->min_righthand = rel2->relids;
283                 sjinfo->syn_lefthand = rel1->relids;
284                 sjinfo->syn_righthand = rel2->relids;
285                 sjinfo->jointype = JOIN_INNER;
286                 /* we don't bother trying to make the remaining fields valid */
287                 sjinfo->lhs_strict = false;
288                 sjinfo->delay_upper_joins = false;
289                 sjinfo->join_quals = NIL;
290         }
291
292         /*
293          * Find or build the join RelOptInfo, and compute the restrictlist that
294          * goes with this particular joining.
295          */
296         joinrel = build_join_rel(root, joinrelids, rel1, rel2, sjinfo,
297                                                          &restrictlist);
298
299         /*
300          * If we've already proven this join is empty, we needn't consider any
301          * more paths for it.
302          */
303         if (is_dummy_rel(joinrel))
304         {
305                 bms_free(joinrelids);
306                 return joinrel;
307         }
308
309         /*
310          * Consider paths using each rel as both outer and inner.  Depending on
311          * the join type, a provably empty outer or inner rel might mean the join
312          * is provably empty too; in which case throw away any previously computed
313          * paths and mark the join as dummy.  (We do it this way since it's
314          * conceivable that dummy-ness of a multi-element join might only be
315          * noticeable for certain construction paths.)
316          *
317          * Also, a provably constant-false join restriction typically means that
318          * we can skip evaluating one or both sides of the join.  We do this by
319          * marking the appropriate rel as dummy.  For outer joins, a
320          * constant-false restriction that is pushed down still means the whole
321          * join is dummy, while a non-pushed-down one means that no inner rows
322          * will join so we can treat the inner rel as dummy.
323          *
324          * We need only consider the jointypes that appear in join_info_list, plus
325          * JOIN_INNER.
326          */
327         switch (sjinfo->jointype)
328         {
329                 case JOIN_INNER:
330                         if (is_dummy_rel(rel1) || is_dummy_rel(rel2) ||
331                                 restriction_is_constant_false(restrictlist, false))
332                         {
333                                 mark_dummy_rel(joinrel);
334                                 break;
335                         }
336                         add_paths_to_joinrel(root, joinrel, rel1, rel2,
337                                                                  JOIN_INNER, sjinfo,
338                                                                  restrictlist);
339                         add_paths_to_joinrel(root, joinrel, rel2, rel1,
340                                                                  JOIN_INNER, sjinfo,
341                                                                  restrictlist);
342                         break;
343                 case JOIN_LEFT:
344                         if (is_dummy_rel(rel1) ||
345                                 restriction_is_constant_false(restrictlist, true))
346                         {
347                                 mark_dummy_rel(joinrel);
348                                 break;
349                         }
350                         if (restriction_is_constant_false(restrictlist, false) &&
351                                 bms_is_subset(rel2->relids, sjinfo->syn_righthand))
352                                 mark_dummy_rel(rel2);
353                         add_paths_to_joinrel(root, joinrel, rel1, rel2,
354                                                                  JOIN_LEFT, sjinfo,
355                                                                  restrictlist);
356                         add_paths_to_joinrel(root, joinrel, rel2, rel1,
357                                                                  JOIN_RIGHT, sjinfo,
358                                                                  restrictlist);
359                         break;
360                 case JOIN_FULL:
361                         if ((is_dummy_rel(rel1) && is_dummy_rel(rel2)) ||
362                                 restriction_is_constant_false(restrictlist, true))
363                         {
364                                 mark_dummy_rel(joinrel);
365                                 break;
366                         }
367                         add_paths_to_joinrel(root, joinrel, rel1, rel2,
368                                                                  JOIN_FULL, sjinfo,
369                                                                  restrictlist);
370                         add_paths_to_joinrel(root, joinrel, rel2, rel1,
371                                                                  JOIN_FULL, sjinfo,
372                                                                  restrictlist);
373
374                         /*
375                          * If there are join quals that aren't mergeable or hashable, we
376                          * may not be able to build any valid plan.  Complain here so that
377                          * we can give a somewhat-useful error message.  (Since we have no
378                          * flexibility of planning for a full join, there's no chance of
379                          * succeeding later with another pair of input rels.)
380                          */
381                         if (joinrel->pathlist == NIL)
382                                 ereport(ERROR,
383                                                 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
384                                                  errmsg("FULL JOIN is only supported with merge-joinable or hash-joinable join conditions")));
385                         break;
386                 case JOIN_SEMI:
387
388                         /*
389                          * We might have a normal semijoin, or a case where we don't have
390                          * enough rels to do the semijoin but can unique-ify the RHS and
391                          * then do an innerjoin (see comments in join_is_legal).  In the
392                          * latter case we can't apply JOIN_SEMI joining.
393                          */
394                         if (bms_is_subset(sjinfo->min_lefthand, rel1->relids) &&
395                                 bms_is_subset(sjinfo->min_righthand, rel2->relids))
396                         {
397                                 if (is_dummy_rel(rel1) || is_dummy_rel(rel2) ||
398                                         restriction_is_constant_false(restrictlist, false))
399                                 {
400                                         mark_dummy_rel(joinrel);
401                                         break;
402                                 }
403                                 add_paths_to_joinrel(root, joinrel, rel1, rel2,
404                                                                          JOIN_SEMI, sjinfo,
405                                                                          restrictlist);
406                         }
407
408                         /*
409                          * If we know how to unique-ify the RHS and one input rel is
410                          * exactly the RHS (not a superset) we can consider unique-ifying
411                          * it and then doing a regular join.  (The create_unique_path
412                          * check here is probably redundant with what join_is_legal did,
413                          * but if so the check is cheap because it's cached.  So test
414                          * anyway to be sure.)
415                          */
416                         if (bms_equal(sjinfo->syn_righthand, rel2->relids) &&
417                                 create_unique_path(root, rel2, rel2->cheapest_total_path,
418                                                                    sjinfo) != NULL)
419                         {
420                                 if (is_dummy_rel(rel1) || is_dummy_rel(rel2) ||
421                                         restriction_is_constant_false(restrictlist, false))
422                                 {
423                                         mark_dummy_rel(joinrel);
424                                         break;
425                                 }
426                                 add_paths_to_joinrel(root, joinrel, rel1, rel2,
427                                                                          JOIN_UNIQUE_INNER, sjinfo,
428                                                                          restrictlist);
429                                 add_paths_to_joinrel(root, joinrel, rel2, rel1,
430                                                                          JOIN_UNIQUE_OUTER, sjinfo,
431                                                                          restrictlist);
432                         }
433                         break;
434                 case JOIN_ANTI:
435                         if (is_dummy_rel(rel1) ||
436                                 restriction_is_constant_false(restrictlist, true))
437                         {
438                                 mark_dummy_rel(joinrel);
439                                 break;
440                         }
441                         if (restriction_is_constant_false(restrictlist, false) &&
442                                 bms_is_subset(rel2->relids, sjinfo->syn_righthand))
443                                 mark_dummy_rel(rel2);
444                         add_paths_to_joinrel(root, joinrel, rel1, rel2,
445                                                                  JOIN_ANTI, sjinfo,
446                                                                  restrictlist);
447                         break;
448                 default:
449                         /* other values not expected here */
450                         elog(ERROR, "unrecognized join type: %d", (int) sjinfo->jointype);
451                         break;
452         }
453
454         bms_free(joinrelids);
455
456         return joinrel;
457 }
458
459 /*
460  * is_dummy_rel --- has relation been proven empty?
461  *
462  * If so, it will have a single path that is dummy.
463  */
464 static bool
465 is_dummy_rel(RelOptInfo *rel)
466 {
467         return (rel->cheapest_total_path != NULL &&
468                         IS_DUMMY_PATH(rel->cheapest_total_path));
469 }
470
471
472 /*
473  * Mark a relation as proven empty.
474  *
475  * During GEQO planning, this can get invoked more than once on the same
476  * baserel struct, so it's worth checking to see if the rel is already marked
477  * dummy.
478  *
479  * Also, when called during GEQO join planning, we are in a short-lived
480  * memory context.      We must make sure that the dummy path attached to a
481  * baserel survives the GEQO cycle, else the baserel is trashed for future
482  * GEQO cycles.  On the other hand, when we are marking a joinrel during GEQO,
483  * we don't want the dummy path to clutter the main planning context.  Upshot
484  * is that the best solution is to explicitly make the dummy path in the same
485  * context the given RelOptInfo is in.
486  */
487 static void
488 mark_dummy_rel(RelOptInfo *rel)
489 {
490         MemoryContext oldcontext;
491
492         /* Already marked? */
493         if (is_dummy_rel(rel))
494                 return;
495
496         /* No, so choose correct context to make the dummy path in */
497         oldcontext = MemoryContextSwitchTo(GetMemoryChunkContext(rel));
498
499         /* Set dummy size estimate */
500         rel->rows = 0;
501
502         /* Evict any previously chosen paths */
503         rel->pathlist = NIL;
504
505         /* Set up the dummy path */
506         add_path(rel, (Path *) create_append_path(rel, NIL));
507
508         /* Set or update cheapest_total_path */
509         set_cheapest(rel);
510
511         MemoryContextSwitchTo(oldcontext);
512 }
513
514
515 /*
516  * restriction_is_constant_false --- is a restrictlist just FALSE?
517  *
518  * In cases where a qual is provably constant FALSE, eval_const_expressions
519  * will generally have thrown away anything that's ANDed with it.  In outer
520  * join situations this will leave us computing cartesian products only to
521  * decide there's no match for an outer row, which is pretty stupid.  So,
522  * we need to detect the case.
523  *
524  * If only_pushed_down is TRUE, then consider only pushed-down quals.
525  */
526 static bool
527 restriction_is_constant_false(List *restrictlist, bool only_pushed_down)
528 {
529         ListCell   *lc;
530
531         /*
532          * Despite the above comment, the restriction list we see here might
533          * possibly have other members besides the FALSE constant, since other
534          * quals could get "pushed down" to the outer join level.  So we check
535          * each member of the list.
536          */
537         foreach(lc, restrictlist)
538         {
539                 RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
540
541                 Assert(IsA(rinfo, RestrictInfo));
542                 if (only_pushed_down && !rinfo->is_pushed_down)
543                         continue;
544
545                 if (rinfo->clause && IsA(rinfo->clause, Const))
546                 {
547                         Const      *con = (Const *) rinfo->clause;
548
549                         /* constant NULL is as good as constant FALSE for our purposes */
550                         if (con->constisnull)
551                                 return true;
552                         if (!DatumGetBool(con->constvalue))
553                                 return true;
554                 }
555         }
556         return false;
557 }