From 7bd946ddabc6ac5c90b28dc0f84f0683d06a29ef Mon Sep 17 00:00:00 2001 From: Richard Guo Date: Tue, 4 Apr 2023 10:04:32 +0800 Subject: [PATCH v2] Adjust outer join's target list --- src/backend/optimizer/path/equivclass.c | 3 +- src/backend/optimizer/path/joinrels.c | 18 ++++++-- src/backend/optimizer/util/relnode.c | 59 +++++++++++++++++++++---- src/include/optimizer/pathnode.h | 1 + src/include/optimizer/paths.h | 3 +- 5 files changed, 70 insertions(+), 14 deletions(-) diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c index ecb1343d1a..2db1bf6448 100644 --- a/src/backend/optimizer/path/equivclass.c +++ b/src/backend/optimizer/path/equivclass.c @@ -1399,7 +1399,8 @@ generate_join_implied_equalities(PlannerInfo *root, nominal_join_relids = bms_union(outer_relids, nominal_inner_relids); nominal_join_relids = add_outer_joins_to_relids(root, nominal_join_relids, - sjinfo); + sjinfo, + NULL); } else { diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c index be7865cc0a..abeaa32a7c 100644 --- a/src/backend/optimizer/path/joinrels.c +++ b/src/backend/optimizer/path/joinrels.c @@ -694,6 +694,7 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2) SpecialJoinInfo sjinfo_data; RelOptInfo *joinrel; List *restrictlist; + List *pushed_down_joins = NULL; /* We should never try to join two overlapping sets of rels. */ Assert(!bms_overlap(rel1->relids, rel2->relids)); @@ -711,7 +712,8 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2) } /* Add outer join relid(s) to form the canonical relids. */ - joinrelids = add_outer_joins_to_relids(root, joinrelids, sjinfo); + joinrelids = add_outer_joins_to_relids(root, joinrelids, sjinfo, + &pushed_down_joins); /* Swap rels if needed to match the join info. */ if (reversed) @@ -753,7 +755,7 @@ make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2) * goes with this particular joining. */ joinrel = build_join_rel(root, joinrelids, rel1, rel2, sjinfo, - &restrictlist); + pushed_down_joins, &restrictlist); /* * If we've already proven this join is empty, we needn't consider any @@ -805,10 +807,13 @@ commute_below_l_is_subset(SpecialJoinInfo *sjinfo, Relids relids) * it first, if a separate value is desired. * * sjinfo represents the join being performed. + * + * pushed_down_joins if not null is used to collect all the pushed down + * outer joins. */ Relids add_outer_joins_to_relids(PlannerInfo *root, Relids input_relids, - SpecialJoinInfo *sjinfo) + SpecialJoinInfo *sjinfo, List **pushed_down_joins) { /* Nothing to do if this isn't an outer join with an assigned relid. */ if (sjinfo == NULL || sjinfo->ojrelid == 0) @@ -863,7 +868,12 @@ add_outer_joins_to_relids(PlannerInfo *root, Relids input_relids, bms_is_subset(othersj->min_lefthand, input_relids) && bms_is_subset(othersj->min_righthand, input_relids) && commute_below_l_is_subset(othersj, input_relids)) + { input_relids = bms_add_member(input_relids, othersj->ojrelid); + + if (pushed_down_joins != NULL) + *pushed_down_joins = lappend(*pushed_down_joins, othersj); + } } } @@ -1627,7 +1637,7 @@ try_partitionwise_join(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2, /* Build correct join relids for child join */ child_joinrelids = bms_union(child_rel1->relids, child_rel2->relids); child_joinrelids = add_outer_joins_to_relids(root, child_joinrelids, - child_sjinfo); + child_sjinfo, NULL); /* Find the AppendRelInfo structures */ appinfos = find_appinfos_by_relids(root, child_joinrelids, &nappinfos); diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index 0d849d9494..a38a3bb75a 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -43,6 +43,7 @@ typedef struct JoinHashEntry static void build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *input_rel, SpecialJoinInfo *sjinfo, + List *pushed_down_joins, bool can_null); static List *build_joinrel_restrictlist(PlannerInfo *root, RelOptInfo *joinrel, @@ -632,6 +633,7 @@ add_join_rel(PlannerInfo *root, RelOptInfo *joinrel) * 'outer_rel' and 'inner_rel' are relation nodes for the relations to be * joined * 'sjinfo': join context info + * 'pushed_down_joins': if not NULL, all the pushed down outer joins * 'restrictlist_ptr': result variable. If not NULL, *restrictlist_ptr * receives the list of RestrictInfo nodes that apply to this * particular pair of joinable relations. @@ -645,6 +647,7 @@ build_join_rel(PlannerInfo *root, RelOptInfo *outer_rel, RelOptInfo *inner_rel, SpecialJoinInfo *sjinfo, + List *pushed_down_joins, List **restrictlist_ptr) { RelOptInfo *joinrel; @@ -757,9 +760,9 @@ build_join_rel(PlannerInfo *root, * and inner rels we first try to build it from. But the contents should * be the same regardless. */ - build_joinrel_tlist(root, joinrel, outer_rel, sjinfo, + build_joinrel_tlist(root, joinrel, outer_rel, sjinfo, pushed_down_joins, (sjinfo->jointype == JOIN_FULL)); - build_joinrel_tlist(root, joinrel, inner_rel, sjinfo, + build_joinrel_tlist(root, joinrel, inner_rel, sjinfo, pushed_down_joins, (sjinfo->jointype != JOIN_INNER)); add_placeholders_to_joinrel(root, joinrel, outer_rel, inner_rel, sjinfo); @@ -870,7 +873,8 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel, joinrel->reloptkind = RELOPT_OTHER_JOINREL; joinrel->relids = bms_union(outer_rel->relids, inner_rel->relids); - joinrel->relids = add_outer_joins_to_relids(root, joinrel->relids, sjinfo); + joinrel->relids = add_outer_joins_to_relids(root, joinrel->relids, sjinfo, + NULL); joinrel->rows = 0; /* cheap startup cost is interesting iff not all tuples to be retrieved */ joinrel->consider_startup = (root->tuple_fraction > 0); @@ -1046,15 +1050,17 @@ min_join_parameterization(PlannerInfo *root, * identity 3 (see optimizer/README). We must take steps to ensure that * the output Vars have the same nulling bitmaps that they would if the * two joins had been done in syntactic order; else they won't match Vars - * appearing higher in the query tree. We need to do two things: + * appearing higher in the query tree. We need to do three things: * - * First, we add the outer join's relid to the nulling bitmap only if the Var - * or PHV actually comes from within the syntactically nullable side(s) of the - * outer join. This takes care of the possibility that we have transformed + * First, we add the outer join's relid to the nulling bitmap only if the + * outer join has been completely performed and the Var or PHV actually + * comes from within the syntactically nullable side(s) of the outer join. + * This takes care of the possibility that we have transformed * (A leftjoin B on (Pab)) leftjoin C on (Pbc) * to * A leftjoin (B leftjoin C on (Pbc)) on (Pab) - * Here the now-upper A/B join must not mark C columns as nulled by itself. + * Here the pushed-down B/C join cannot mark C columns as nulled by itself, + * and the now-upper A/B join must not mark C columns as nulled by itself. * * Second, any relid in sjinfo->commute_above_r that is already part of * the joinrel is added to the nulling bitmaps of nullable Vars and PHVs. @@ -1065,11 +1071,22 @@ min_join_parameterization(PlannerInfo *root, * The C columns emitted by the B/C join need to be shown as nulled by both * the B/C and A/B joins, even though they've not physically traversed the * A/B join. + * + * Third, for each of the pushed down outer joins that have been completely + * performed just by now, we add its relid to the nulling bitmap if the Var + * or PHV actually comes from within its syntactically nullable side(s). + * This takes care of the possibility that we have transformed + * (A leftjoin B on (Pab)) leftjoin C on (Pbc) + * to + * A leftjoin (B leftjoin C on (Pbc)) on (Pab) + * Here the now-upper A/B join needs to mark C columns as nulled by the + * pushed-down B/C join. */ static void build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *input_rel, SpecialJoinInfo *sjinfo, + List *pushed_down_joins, bool can_null) { Relids relids = joinrel->relids; @@ -1097,9 +1114,12 @@ build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel, */ if (can_null) { + ListCell *lc; + phv = copyObject(phv); /* See comments above to understand this logic */ if (sjinfo->ojrelid != 0 && + bms_is_member(sjinfo->ojrelid, relids) && (bms_is_subset(phv->phrels, sjinfo->syn_righthand) || (sjinfo->jointype == JOIN_FULL && bms_is_subset(phv->phrels, sjinfo->syn_lefthand)))) @@ -1109,6 +1129,16 @@ build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel, bms_join(phv->phnullingrels, bms_intersect(sjinfo->commute_above_r, relids)); + foreach(lc, pushed_down_joins) + { + SpecialJoinInfo *othersj = (SpecialJoinInfo *) lfirst(lc); + + Assert(othersj->ojrelid != 0 && othersj->jointype == JOIN_LEFT); + + if (bms_is_subset(phv->phrels, othersj->syn_righthand)) + phv->phnullingrels = bms_add_member(phv->phnullingrels, + othersj->ojrelid); + } } joinrel->reltarget->exprs = lappend(joinrel->reltarget->exprs, @@ -1162,9 +1192,12 @@ build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel, */ if (can_null && var->varno != ROWID_VAR) { + ListCell *lc; + var = copyObject(var); /* See comments above to understand this logic */ if (sjinfo->ojrelid != 0 && + bms_is_member(sjinfo->ojrelid, relids) && (bms_is_member(var->varno, sjinfo->syn_righthand) || (sjinfo->jointype == JOIN_FULL && bms_is_member(var->varno, sjinfo->syn_lefthand)))) @@ -1174,6 +1207,16 @@ build_joinrel_tlist(PlannerInfo *root, RelOptInfo *joinrel, bms_join(var->varnullingrels, bms_intersect(sjinfo->commute_above_r, relids)); + foreach(lc, pushed_down_joins) + { + SpecialJoinInfo *othersj = (SpecialJoinInfo *) lfirst(lc); + + Assert(othersj->ojrelid != 0 && othersj->jointype == JOIN_LEFT); + + if (bms_is_member(var->varno, othersj->syn_righthand)) + var->varnullingrels = bms_add_member(var->varnullingrels, + othersj->ojrelid); + } } joinrel->reltarget->exprs = lappend(joinrel->reltarget->exprs, diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h index 69be701b16..001e75b5b7 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -311,6 +311,7 @@ extern RelOptInfo *build_join_rel(PlannerInfo *root, RelOptInfo *outer_rel, RelOptInfo *inner_rel, SpecialJoinInfo *sjinfo, + List *pushed_down_joins, List **restrictlist_ptr); extern Relids min_join_parameterization(PlannerInfo *root, Relids joinrelids, diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index d9e1623274..50bc3b503a 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -105,7 +105,8 @@ extern void join_search_one_level(PlannerInfo *root, int level); extern RelOptInfo *make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2); extern Relids add_outer_joins_to_relids(PlannerInfo *root, Relids input_relids, - SpecialJoinInfo *sjinfo); + SpecialJoinInfo *sjinfo, + List **pushed_down_joins); extern bool have_join_order_restriction(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2); extern bool have_dangerous_phv(PlannerInfo *root, -- 2.31.0