From 2e8a79a2203eeaeb004af69f23f32025e2e29ead Mon Sep 17 00:00:00 2001 From: jian he Date: Mon, 15 Jul 2024 11:30:17 +0800 Subject: [PATCH v6 1/1] Remove useless self-joins MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 8bit The Self Join Elimination (SJE) feature removes an inner join of a plain table to itself in the query tree if is proved that the join can be replaced with a scan without impacting the query result. Self join and inner relation are replaced with the outer in query, equivalence classes, and planner info structures. Also, inner restrictlist moves to the outer one with removing duplicated clauses. Thus, this optimization reduces the length of the range table list (this especially makes sense for partitioned relations), reduces the number of restriction clauses === selectivity estimations, and potentially can improve total planner prediction for the query. The SJE proof is based on innerrel_is_unique machinery. We can remove a self-join when for each outer row: 1. At most one inner row matches the join clause. 2. Each matched inner row must be (physically) the same row as the outer one. In this patch we use the next approach to identify a self-join: 1. Collect all merge-joinable join quals which look like a.x = b.x 2. Add to the list above the baseretrictinfo of the inner table. 3. Check innerrel_is_unique() for the qual list. If it returns false, skip this pair of joining tables. 4. Check uniqueness, proved by the baserestrictinfo clauses. To prove the possibility of self-join elimination inner and outer clauses must have an exact match. The relation replacement procedure is not trivial and it is partly combined with the one, used to remove useless left joins. Tests, covering this feature, were added to join.sql. Some regression tests changed due to self-join removal logic. Discussion: https://postgr.es/m/flat/64486b0b-0404-e39e-322d-0801154901f3%40postgrespro.ru Author: Andrey Lepikhov, Alexander Kuzmenkov Reviewed-by: Tom Lane, Robert Haas, Andres Freund, Simon Riggs, Jonathan S. Katz Reviewed-by: David Rowley, Thomas Munro, Konstantin Knizhnik, Heikki Linnakangas Reviewed-by: Hywel Carver, Laurenz Albe, Ronan Dunklau, vignesh C, Zhihong Yu Reviewed-by: Greg Stark, Jaime Casanova, Michał Kłeczek, Alena Rybakina Reviewed-by: Alexander Korotkov --- doc/src/sgml/config.sgml | 16 + src/backend/optimizer/path/equivclass.c | 3 +- src/backend/optimizer/path/indxpath.c | 39 + src/backend/optimizer/plan/analyzejoins.c | 1126 +++++++++++++++++++-- src/backend/optimizer/plan/planmain.c | 5 + src/backend/optimizer/prep/prepunion.c | 7 +- src/backend/rewrite/rewriteManip.c | 150 ++- src/backend/utils/misc/guc_tables.c | 10 + src/include/nodes/pathnodes.h | 35 +- src/include/optimizer/optimizer.h | 2 + src/include/optimizer/paths.h | 3 + src/include/optimizer/planmain.h | 6 + src/include/rewrite/rewriteManip.h | 4 + src/test/regress/expected/equivclass.out | 30 + src/test/regress/expected/join.out | 1064 +++++++++++++++++++ src/test/regress/expected/sysviews.out | 3 +- src/test/regress/sql/equivclass.sql | 16 + src/test/regress/sql/join.sql | 486 +++++++++ src/tools/pgindent/typedefs.list | 3 + 19 files changed, 2924 insertions(+), 84 deletions(-) diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index b14c5d81..27ec59c6 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -5612,6 +5612,22 @@ ANY num_sync ( + enable_self_join_removal (boolean) + + enable_self_join_removal configuration parameter + + + + + Enables or disables the query planner's optimization which analyses + the query tree and replaces self joins with semantically equivalent + single scans. Takes into consideration only plain tables. + The default is on. + + + + enable_seqscan (boolean) diff --git a/src/backend/optimizer/path/equivclass.c b/src/backend/optimizer/path/equivclass.c index 51d80632..b1863726 100644 --- a/src/backend/optimizer/path/equivclass.c +++ b/src/backend/optimizer/path/equivclass.c @@ -856,7 +856,8 @@ find_computable_ec_member(PlannerInfo *root, exprvars = pull_var_clause((Node *) em->em_expr, PVC_INCLUDE_AGGREGATES | PVC_INCLUDE_WINDOWFUNCS | - PVC_INCLUDE_PLACEHOLDERS); + PVC_INCLUDE_PLACEHOLDERS | + PVC_INCLUDE_CONVERTROWTYPES); foreach(lc2, exprvars) { if (!is_exprlist_member(lfirst(lc2), exprs)) diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index c0fcc7d7..2230b131 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -3440,6 +3440,22 @@ bool relation_has_unique_index_for(PlannerInfo *root, RelOptInfo *rel, List *restrictlist, List *exprlist, List *oprlist) +{ + return relation_has_unique_index_ext(root, rel, restrictlist, + exprlist, oprlist, NULL); +} + +/* + * relation_has_unique_index_ext + * Same as relation_has_unique_index_for(), but supports extra_clauses + * parameter. If extra_clauses isn't NULL, return baserestrictinfo clauses + * which were used to derive uniqueness. + */ +bool +relation_has_unique_index_ext(PlannerInfo *root, RelOptInfo *rel, + List *restrictlist, + List *exprlist, List *oprlist, + List **extra_clauses) { ListCell *ic; @@ -3495,6 +3511,7 @@ relation_has_unique_index_for(PlannerInfo *root, RelOptInfo *rel, { IndexOptInfo *ind = (IndexOptInfo *) lfirst(ic); int c; + List *exprs = NIL; /* * If the index is not unique, or not immediately enforced, or if it's @@ -3546,6 +3563,24 @@ relation_has_unique_index_for(PlannerInfo *root, RelOptInfo *rel, if (match_index_to_operand(rexpr, c, ind)) { matched = true; /* column is unique */ + + if (bms_membership(rinfo->clause_relids) == BMS_SINGLETON) + { + MemoryContext oldMemCtx = + MemoryContextSwitchTo(root->planner_cxt); + + /* + * Add filter clause into a list allowing caller to + * know if uniqueness have made not only by join + * clauses. + */ + Assert(bms_is_empty(rinfo->left_relids) || + bms_is_empty(rinfo->right_relids)); + if (extra_clauses) + exprs = lappend(exprs, rinfo); + MemoryContextSwitchTo(oldMemCtx); + } + break; } } @@ -3588,7 +3623,11 @@ relation_has_unique_index_for(PlannerInfo *root, RelOptInfo *rel, /* Matched all key columns of this index? */ if (c == ind->nkeycolumns) + { + if (extra_clauses) + *extra_clauses = exprs; return true; + } } return false; diff --git a/src/backend/optimizer/plan/analyzejoins.c b/src/backend/optimizer/plan/analyzejoins.c index c3fd4a81..89b194a3 100644 --- a/src/backend/optimizer/plan/analyzejoins.c +++ b/src/backend/optimizer/plan/analyzejoins.c @@ -22,6 +22,7 @@ */ #include "postgres.h" +#include "catalog/pg_class.h" #include "nodes/nodeFuncs.h" #include "optimizer/joininfo.h" #include "optimizer/optimizer.h" @@ -29,12 +30,31 @@ #include "optimizer/paths.h" #include "optimizer/planmain.h" #include "optimizer/restrictinfo.h" +#include "rewrite/rewriteManip.h" #include "utils/lsyscache.h" +/* + * Utility structure. It is needed to perform a sorting procedure and simplify + * the search of SJE-candidate baserels referencing the same database relation. + * Having collected all baserels from the query jointree, the planner sorts them + * according to the reloid value, groups them with the next pass and attempts to + * remove self-joins. + * Preliminary sorting prevents quadratic behaviour that can be harmful in the + * case of numerous joins. + */ +typedef struct +{ + int relid; + Oid reloid; +} SelfJoinCandidate; + +bool enable_self_join_removal; + /* local functions */ static bool join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo); -static void remove_rel_from_query(PlannerInfo *root, int relid, - SpecialJoinInfo *sjinfo); + +static void remove_leftjoinrel_from_query(PlannerInfo *root, int relid, + SpecialJoinInfo *sjinfo); static void remove_rel_from_restrictinfo(RestrictInfo *rinfo, int relid, int ojrelid); static void remove_rel_from_eclass(EquivalenceClass *ec, @@ -42,14 +62,16 @@ static void remove_rel_from_eclass(EquivalenceClass *ec, static List *remove_rel_from_joinlist(List *joinlist, int relid, int *nremoved); static bool rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel); static bool rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, - List *clause_list); + List *clause_list, List **extra_clauses); static Oid distinct_col_search(int colno, List *colnos, List *opids); static bool is_innerrel_unique_for(PlannerInfo *root, Relids joinrelids, Relids outerrelids, RelOptInfo *innerrel, JoinType jointype, - List *restrictlist); + List *restrictlist, + List **extra_clauses); +static int self_join_candidates_cmp(const void *a, const void *b); /* @@ -87,7 +109,7 @@ restart: */ innerrelid = bms_singleton_member(sjinfo->min_righthand); - remove_rel_from_query(root, innerrelid, sjinfo); + remove_leftjoinrel_from_query(root, innerrelid, sjinfo); /* We verify that exactly one reference gets removed from joinlist */ nremoved = 0; @@ -291,8 +313,8 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) continue; /* not mergejoinable */ /* - * Check if the clause has the form "outer op inner" or "inner op - * outer", and if so mark which side is inner. + * Check if clause has the form "outer op inner" or "inner op outer", + * and if so mark which side is inner. */ if (!clause_sides_match_join(restrictinfo, sjinfo->min_lefthand, innerrel->relids)) @@ -306,7 +328,7 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) * Now that we have the relevant equality join clauses, try to prove the * innerrel distinct. */ - if (rel_is_distinct_for(root, innerrel, clause_list)) + if (rel_is_distinct_for(root, innerrel, clause_list, NULL)) return true; /* @@ -318,31 +340,27 @@ join_is_removable(PlannerInfo *root, SpecialJoinInfo *sjinfo) /* - * Remove the target relid and references to the target join from the + * Remove the target rel->relid and references to the target join from the * planner's data structures, having determined that there is no need - * to include them in the query. + * to include them in the query. Optionally replace them with subst if subst + * is non-negative. * - * We are not terribly thorough here. We only bother to update parts of - * the planner's data structures that will actually be consulted later. + * This function updates only parts needed for both left-join removal and + * self-join removal. */ static void -remove_rel_from_query(PlannerInfo *root, int relid, SpecialJoinInfo *sjinfo) +remove_rel_from_query(PlannerInfo *root, RelOptInfo *rel, + int subst, SpecialJoinInfo *sjinfo, + Relids joinrelids) { - RelOptInfo *rel = find_base_rel(root, relid); - int ojrelid = sjinfo->ojrelid; - Relids joinrelids; - Relids join_plus_commute; - List *joininfos; + int relid = rel->relid; + int ojrelid = (sjinfo != NULL) ? sjinfo->ojrelid : -1; Index rti; ListCell *l; - /* Compute the relid set for the join we are considering */ - joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand); - Assert(ojrelid != 0); - joinrelids = bms_add_member(joinrelids, ojrelid); - /* - * Remove references to the rel from other baserels' attr_needed arrays. + * Remove references to the rel from other baserels' attr_needed arrays + * and lateral_vars lists. */ for (rti = 1; rti < root->simple_rel_array_size; rti++) { @@ -364,19 +382,23 @@ remove_rel_from_query(PlannerInfo *root, int relid, SpecialJoinInfo *sjinfo) attroff--) { otherrel->attr_needed[attroff] = - bms_del_member(otherrel->attr_needed[attroff], relid); + replace_relid(otherrel->attr_needed[attroff], relid, subst); otherrel->attr_needed[attroff] = - bms_del_member(otherrel->attr_needed[attroff], ojrelid); + replace_relid(otherrel->attr_needed[attroff], ojrelid, subst); } + + /* Update lateral_vars list. */ + if (subst > 0) + ChangeVarNodes((Node *) otherrel->lateral_vars, relid, subst, 0); } /* * Update all_baserels and related relid sets. */ - root->all_baserels = bms_del_member(root->all_baserels, relid); - root->outer_join_rels = bms_del_member(root->outer_join_rels, ojrelid); - root->all_query_rels = bms_del_member(root->all_query_rels, relid); - root->all_query_rels = bms_del_member(root->all_query_rels, ojrelid); + root->all_baserels = replace_relid(root->all_baserels, relid, subst); + root->outer_join_rels = replace_relid(root->outer_join_rels, ojrelid, subst); + root->all_query_rels = replace_relid(root->all_query_rels, relid, subst); + root->all_query_rels = replace_relid(root->all_query_rels, ojrelid, subst); /* * Likewise remove references from SpecialJoinInfo data structures. @@ -390,30 +412,23 @@ remove_rel_from_query(PlannerInfo *root, int relid, SpecialJoinInfo *sjinfo) { SpecialJoinInfo *sjinf = (SpecialJoinInfo *) lfirst(l); - /* - * initsplan.c is fairly cavalier about allowing SpecialJoinInfos' - * lefthand/righthand relid sets to be shared with other data - * structures. Ensure that we don't modify the original relid sets. - * (The commute_xxx sets are always per-SpecialJoinInfo though.) - */ - sjinf->min_lefthand = bms_copy(sjinf->min_lefthand); - sjinf->min_righthand = bms_copy(sjinf->min_righthand); - sjinf->syn_lefthand = bms_copy(sjinf->syn_lefthand); - sjinf->syn_righthand = bms_copy(sjinf->syn_righthand); - /* Now remove relid and ojrelid bits from the sets: */ - sjinf->min_lefthand = bms_del_member(sjinf->min_lefthand, relid); - sjinf->min_righthand = bms_del_member(sjinf->min_righthand, relid); - sjinf->syn_lefthand = bms_del_member(sjinf->syn_lefthand, relid); - sjinf->syn_righthand = bms_del_member(sjinf->syn_righthand, relid); - sjinf->min_lefthand = bms_del_member(sjinf->min_lefthand, ojrelid); - sjinf->min_righthand = bms_del_member(sjinf->min_righthand, ojrelid); - sjinf->syn_lefthand = bms_del_member(sjinf->syn_lefthand, ojrelid); - sjinf->syn_righthand = bms_del_member(sjinf->syn_righthand, ojrelid); + sjinf->min_lefthand = replace_relid(sjinf->min_lefthand, relid, subst); + sjinf->min_righthand = replace_relid(sjinf->min_righthand, relid, subst); + sjinf->syn_lefthand = replace_relid(sjinf->syn_lefthand, relid, subst); + sjinf->syn_righthand = replace_relid(sjinf->syn_righthand, relid, subst); + sjinf->min_lefthand = replace_relid(sjinf->min_lefthand, ojrelid, subst); + sjinf->min_righthand = replace_relid(sjinf->min_righthand, ojrelid, subst); + sjinf->syn_lefthand = replace_relid(sjinf->syn_lefthand, ojrelid, subst); + sjinf->syn_righthand = replace_relid(sjinf->syn_righthand, ojrelid, subst); + /* relid cannot appear in these fields, but ojrelid can: */ - sjinf->commute_above_l = bms_del_member(sjinf->commute_above_l, ojrelid); - sjinf->commute_above_r = bms_del_member(sjinf->commute_above_r, ojrelid); - sjinf->commute_below_l = bms_del_member(sjinf->commute_below_l, ojrelid); - sjinf->commute_below_r = bms_del_member(sjinf->commute_below_r, ojrelid); + sjinf->commute_above_l = replace_relid(sjinf->commute_above_l, ojrelid, subst); + sjinf->commute_above_r = replace_relid(sjinf->commute_above_r, ojrelid, subst); + sjinf->commute_below_l = replace_relid(sjinf->commute_below_l, ojrelid, subst); + sjinf->commute_below_r = replace_relid(sjinf->commute_below_r, ojrelid, subst); + + if (subst > 0) + ChangeVarNodes((Node *) sjinf->semi_rhs_exprs, relid, subst, 0); } /* @@ -434,10 +449,10 @@ remove_rel_from_query(PlannerInfo *root, int relid, SpecialJoinInfo *sjinfo) { PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l); - Assert(!bms_is_member(relid, phinfo->ph_lateral)); + Assert(sjinfo == NULL || !bms_is_member(relid, phinfo->ph_lateral)); if (bms_is_subset(phinfo->ph_needed, joinrelids) && bms_is_member(relid, phinfo->ph_eval_at) && - !bms_is_member(ojrelid, phinfo->ph_eval_at)) + (sjinfo == NULL || !bms_is_member(ojrelid, phinfo->ph_eval_at))) { root->placeholder_list = foreach_delete_current(root->placeholder_list, l); @@ -447,18 +462,58 @@ remove_rel_from_query(PlannerInfo *root, int relid, SpecialJoinInfo *sjinfo) { PlaceHolderVar *phv = phinfo->ph_var; - phinfo->ph_eval_at = bms_del_member(phinfo->ph_eval_at, relid); - phinfo->ph_eval_at = bms_del_member(phinfo->ph_eval_at, ojrelid); + phinfo->ph_eval_at = replace_relid(phinfo->ph_eval_at, relid, subst); + phinfo->ph_eval_at = replace_relid(phinfo->ph_eval_at, ojrelid, subst); Assert(!bms_is_empty(phinfo->ph_eval_at)); /* checked previously */ - phinfo->ph_needed = bms_del_member(phinfo->ph_needed, relid); - phinfo->ph_needed = bms_del_member(phinfo->ph_needed, ojrelid); + + phinfo->ph_needed = replace_relid(phinfo->ph_needed, relid, subst); + phinfo->ph_needed = replace_relid(phinfo->ph_needed, ojrelid, subst); /* ph_needed might or might not become empty */ - phv->phrels = bms_del_member(phv->phrels, relid); - phv->phrels = bms_del_member(phv->phrels, ojrelid); + + phinfo->ph_lateral = replace_relid(phinfo->ph_lateral, relid, subst); + /* + * ph_lateral might contain rels mentioned in ph_eval_at after the + * replacement, remove them. + */ + phinfo->ph_lateral = bms_difference(phinfo->ph_lateral, phinfo->ph_eval_at); + /* ph_lateral might or might not be empty */ + + phv->phrels = replace_relid(phv->phrels, relid, subst); + phv->phrels = replace_relid(phv->phrels, ojrelid, subst); Assert(!bms_is_empty(phv->phrels)); + + ChangeVarNodes((Node *) phv->phexpr, relid, subst, 0); + Assert(phv->phnullingrels == NULL); /* no need to adjust */ } } +} + +/* + * Remove the target relid and references to the target join from the + * planner's data structures, having determined that there is no need + * to include them in the query. + * + * We are not terribly thorough here. We only bother to update parts of + * the planner's data structures that will actually be consulted later. + */ +static void +remove_leftjoinrel_from_query(PlannerInfo *root, int relid, + SpecialJoinInfo *sjinfo) +{ + List *joininfos; + int ojrelid = sjinfo->ojrelid; + RelOptInfo *rel = find_base_rel(root, relid); + Relids join_plus_commute; + Relids joinrelids; + ListCell *l; + + /* Compute the relid set for the join we are considering */ + joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand); + Assert(ojrelid != 0); + joinrelids = bms_add_member(joinrelids, ojrelid); + + remove_rel_from_query(root, rel, -1, sjinfo, joinrelids); /* * Remove any joinquals referencing the rel from the joininfo lists. @@ -856,9 +911,14 @@ rel_supports_distinctness(PlannerInfo *root, RelOptInfo *rel) * Note that the passed-in clause_list may be destructively modified! This * is OK for current uses, because the clause_list is built by the caller for * the sole purpose of passing to this function. + * + * extra_clauses contains the right sides of baserestrictinfo clauses looking + * like x = const if distinctness is derived from such clauses, not joininfo + * clause. Pass NULL to the extra_clauses, if its value is not needed. */ static bool -rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list) +rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list, + List **extra_clauses) { /* * We could skip a couple of tests here if we assume all callers checked @@ -871,10 +931,11 @@ rel_is_distinct_for(PlannerInfo *root, RelOptInfo *rel, List *clause_list) { /* * Examine the indexes to see if we have a matching unique index. - * relation_has_unique_index_for automatically adds any usable + * relation_has_unique_index_ext automatically adds any usable * restriction clauses for the rel, so we needn't do that here. */ - if (relation_has_unique_index_for(root, rel, clause_list, NIL, NIL)) + if (relation_has_unique_index_ext(root, rel, clause_list, NIL, NIL, + extra_clauses)) return true; } else if (rel->rtekind == RTE_SUBQUERY) @@ -1188,9 +1249,33 @@ innerrel_is_unique(PlannerInfo *root, JoinType jointype, List *restrictlist, bool force_cache) +{ + return innerrel_is_unique_ext(root, joinrelids, outerrelids, innerrel, + jointype, restrictlist, force_cache, NULL); +} + +/* + * innerrel_is_unique_ext + * Do the same as innerrel_is_unique(), but also set to '*extra_clauses' + * additional clauses from a baserestrictinfo list that were used to prove + * uniqueness. A non NULL 'extra_clauses' indicates that we're checking + * for self-join and correspondingly dealing with filtered clauses. + */ +bool +innerrel_is_unique_ext(PlannerInfo *root, + Relids joinrelids, + Relids outerrelids, + RelOptInfo *innerrel, + JoinType jointype, + List *restrictlist, + bool force_cache, + List **extra_clauses) { MemoryContext old_context; ListCell *lc; + UniqueRelInfo *uniqueRelInfo; + List *outer_exprs = NIL; + bool self_join = (extra_clauses != NULL); /* Certainly can't prove uniqueness when there are no joinclauses */ if (restrictlist == NIL) @@ -1205,17 +1290,28 @@ innerrel_is_unique(PlannerInfo *root, /* * Query the cache to see if we've managed to prove that innerrel is - * unique for any subset of this outerrel. We don't need an exact match, - * as extra outerrels can't make the innerrel any less unique (or more - * formally, the restrictlist for a join to a superset outerrel must be a - * superset of the conditions we successfully used before). + * unique for any subset of this outerrel. For non self-join search, we + * don't need an exact match, as extra outerrels can't make the innerrel + * any less unique (or more formally, the restrictlist for a join to a + * superset outerrel must be a superset of the conditions we successfully + * used before). For self-join search, we require an exact match of + * outerrels, because we need extra clauses to be valid for our case. + * Also, for self-join checking we've filtered the clauses list. Thus, + * for a self-join search, we can match only the result cached for another + * self-join check. */ foreach(lc, innerrel->unique_for_rels) { - Relids unique_for_rels = (Relids) lfirst(lc); + uniqueRelInfo = (UniqueRelInfo *) lfirst(lc); - if (bms_is_subset(unique_for_rels, outerrelids)) + if ((!self_join && bms_is_subset(uniqueRelInfo->outerrelids, outerrelids)) || + (self_join && bms_equal(uniqueRelInfo->outerrelids, outerrelids) && + uniqueRelInfo->self_join)) + { + if (extra_clauses) + *extra_clauses = uniqueRelInfo->extra_clauses; return true; /* Success! */ + } } /* @@ -1232,7 +1328,8 @@ innerrel_is_unique(PlannerInfo *root, /* No cached information, so try to make the proof. */ if (is_innerrel_unique_for(root, joinrelids, outerrelids, innerrel, - jointype, restrictlist)) + jointype, restrictlist, + self_join ? &outer_exprs : NULL)) { /* * Cache the positive result for future probes, being sure to keep it @@ -1245,10 +1342,16 @@ innerrel_is_unique(PlannerInfo *root, * supersets of them anyway. */ old_context = MemoryContextSwitchTo(root->planner_cxt); + uniqueRelInfo = makeNode(UniqueRelInfo); + uniqueRelInfo->outerrelids = bms_copy(outerrelids); + uniqueRelInfo->self_join = self_join; + uniqueRelInfo->extra_clauses = outer_exprs; innerrel->unique_for_rels = lappend(innerrel->unique_for_rels, - bms_copy(outerrelids)); + uniqueRelInfo); MemoryContextSwitchTo(old_context); + if (extra_clauses) + *extra_clauses = outer_exprs; return true; /* Success! */ } else @@ -1294,7 +1397,8 @@ is_innerrel_unique_for(PlannerInfo *root, Relids outerrelids, RelOptInfo *innerrel, JoinType jointype, - List *restrictlist) + List *restrictlist, + List **extra_clauses) { List *clause_list = NIL; ListCell *lc; @@ -1324,17 +1428,885 @@ is_innerrel_unique_for(PlannerInfo *root, continue; /* not mergejoinable */ /* - * Check if clause has the form "outer op inner" or "inner op outer", - * and if so mark which side is inner. + * Check if the clause has the form "outer op inner" or "inner op + * outer", and if so mark which side is inner. */ if (!clause_sides_match_join(restrictinfo, outerrelids, innerrel->relids)) continue; /* no good for these input relations */ - /* OK, add to list */ + /* OK, add to the list */ clause_list = lappend(clause_list, restrictinfo); } /* Let rel_is_distinct_for() do the hard work */ - return rel_is_distinct_for(root, innerrel, clause_list); + return rel_is_distinct_for(root, innerrel, clause_list, extra_clauses); +} + +/* + * Update EC members to point to the remaining relation instead of the removed + * one, removing duplicates. + * + * Restriction clauses for base relations are already distributed to + * the respective baserestrictinfo lists (see + * generate_implied_equalities_for_column). The above code has already processed + * this list, and updated these clauses to reference the remaining + * relation, so we can skip them here based on their relids. + * + * Likewise, we have already processed the join clauses that join the + * removed relation to the remaining one. + * + * Finally, there are join clauses that join the removed relation to + * some third relation. We can't just delete the source clauses and + * regenerate them from the EC because the corresponding equality + * operators might be missing (see the handling of ec_broken). + * Therefore, we will update the references in the source clauses. + * + * Derived clauses can be generated again, so it is simpler to just + * delete them. + */ +static void +update_eclasses(EquivalenceClass *ec, int from, int to) +{ + List *new_members = NIL; + List *new_sources = NIL; + + foreach_node(EquivalenceMember, em, ec->ec_members) + { + bool is_redundant = false; + + if (!bms_is_member(from, em->em_relids)) + { + new_members = lappend(new_members, em); + continue; + } + + em->em_relids = replace_relid(em->em_relids, from, to); + em->em_jdomain->jd_relids = replace_relid(em->em_jdomain->jd_relids, from, to); + + /* We only process inner joins */ + ChangeVarNodes((Node *) em->em_expr, from, to, 0); + + foreach_node(EquivalenceMember, other, new_members) + { + if (!equal(em->em_relids, other->em_relids)) + continue; + + if (equal(em->em_expr, other->em_expr)) + { + is_redundant = true; + break; + } + } + + if (!is_redundant) + new_members = lappend(new_members, em); + } + + list_free(ec->ec_members); + ec->ec_members = new_members; + + list_free(ec->ec_derives); + ec->ec_derives = NULL; + + /* Update EC source expressions */ + foreach_node(RestrictInfo, rinfo, ec->ec_sources) + { + bool is_redundant = false; + + if (!bms_is_member(from, rinfo->required_relids)) + { + new_sources = lappend(new_sources, rinfo); + continue; + } + + ChangeVarNodes((Node *) rinfo, from, to, 0); + + /* + * After switching the clause to the remaining relation, check it for + * redundancy with existing ones. We don't have to check for + * redundancy with derived clauses, because we've just deleted them. + */ + foreach_node(RestrictInfo, other, new_sources) + { + if (!equal(rinfo->clause_relids, other->clause_relids)) + continue; + + if (equal(rinfo->clause, other->clause)) + { + is_redundant = true; + break; + } + } + + if (!is_redundant) + new_sources = lappend(new_sources, rinfo); + } + + list_free(ec->ec_sources); + ec->ec_sources = new_sources; + ec->ec_relids = replace_relid(ec->ec_relids, from, to); +} + +/* + * "Logically" compares two RestrictInfo's ignoring the 'rinfo_serial' field, + * which makes almost every RestrictInfo unique. This type of comparison is + * useful when removing duplicates while moving RestrictInfo's from removed + * relation to remaining relation during self-join elimination. + * + * XXX: In the future, we might remove the 'rinfo_serial' field completely and + * get rid of this function. + */ +static bool +restrict_infos_logically_equal(RestrictInfo *a, RestrictInfo *b) +{ + int saved_rinfo_serial = a->rinfo_serial; + bool result; + + a->rinfo_serial = b->rinfo_serial; + result = equal(a, b); + a->rinfo_serial = saved_rinfo_serial; + + return result; +} + +/* + * Remove a relation after we have proven that it participates only in an + * unneeded unique self join. + * + * Replace any links in planner info structures. + * + * Transfer join and restriction clauses from the removed relation to the + * remaining one. We change the Vars of the clause to point to the + * remaining relation instead of the removed one. The clauses that require + * a subset of joinrelids become restriction clauses of the remaining + * relation, and others remain join clauses. We append them to + * baserestrictinfo and joininfo respectively, trying not to introduce + * duplicates. + * + * We also have to process the 'joinclauses' list here, because it + * contains EC-derived join clauses which must become filter clauses. It + * is not enough to just correct the ECs because the EC-derived + * restrictions are generated before join removal (see + * generate_base_implied_equalities). + */ +static void +remove_self_join_rel(PlannerInfo *root, PlanRowMark *kmark, PlanRowMark *rmark, + RelOptInfo *toKeep, RelOptInfo *toRemove, + List *restrictlist) +{ + List *joininfos; + ListCell *lc; + int i; + List *jinfo_candidates = NIL; + List *binfo_candidates = NIL; + + Assert(toKeep->relid > 0); + Assert(toRemove->relid > 0); + + /* + * Replace the index of the removing table with the keeping one. The + * technique of removing/distributing restrictinfo is used here to attach + * just appeared (for keeping relation) join clauses and avoid adding + * duplicates of those that already exist in the joininfo list. + */ + joininfos = list_copy(toRemove->joininfo); + foreach_node(RestrictInfo, rinfo, joininfos) + { + remove_join_clause_from_rels(root, rinfo, rinfo->required_relids); + ChangeVarNodes((Node *) rinfo, toRemove->relid, toKeep->relid, 0); + + if (bms_membership(rinfo->required_relids) == BMS_MULTIPLE) + jinfo_candidates = lappend(jinfo_candidates, rinfo); + else + binfo_candidates = lappend(binfo_candidates, rinfo); + } + + /* + * Concatenate restrictlist to the list of base restrictions of the + * removing table just to simplify the replacement procedure: all of them + * weren't connected to any keeping relations and need to be added to some + * rels. + */ + toRemove->baserestrictinfo = list_concat(toRemove->baserestrictinfo, + restrictlist); + foreach_node(RestrictInfo, rinfo, toRemove->baserestrictinfo) + { + ChangeVarNodes((Node *) rinfo, toRemove->relid, toKeep->relid, 0); + + if (bms_membership(rinfo->required_relids) == BMS_MULTIPLE) + jinfo_candidates = lappend(jinfo_candidates, rinfo); + else + binfo_candidates = lappend(binfo_candidates, rinfo); + } + + /* + * Now, add all non-redundant clauses to the keeping relation. + * Contradictory operation. On the one side, we reduce the length of + * restrict lists that can impact planning or executing time. + * Additionally, we improve the accuracy of cardinality estimation. On the + * other side, it is one more place that can make planning time much + * longer in specific cases. It would have been better to avoid calling + * the equal() function here, but it's the only way to detect duplicated + * inequality expressions. + */ + foreach_node(RestrictInfo, rinfo, binfo_candidates) + { + bool is_redundant = false; + + Assert(!bms_is_member(toRemove->relid, rinfo->required_relids)); + + foreach_node(RestrictInfo, src, toKeep->baserestrictinfo) + { + if (!bms_equal(src->clause_relids, rinfo->clause_relids)) + /* Const and non-const expressions can't be equal */ + continue; + + if (src == rinfo || + (rinfo->parent_ec != NULL + && src->parent_ec == rinfo->parent_ec) + || restrict_infos_logically_equal(rinfo, src)) + { + is_redundant = true; + break; + } + } + if (!is_redundant) + distribute_restrictinfo_to_rels(root, rinfo); + } + foreach_node(RestrictInfo, rinfo, jinfo_candidates) + { + bool is_redundant = false; + + Assert(!bms_is_member(toRemove->relid, rinfo->required_relids)); + + foreach_node(RestrictInfo, src, toKeep->joininfo) + { + if (!bms_equal(src->clause_relids, rinfo->clause_relids)) + /* Can't compare trivially different clauses */ + continue; + + if (src == rinfo || + (rinfo->parent_ec != NULL + && src->parent_ec == rinfo->parent_ec) + || restrict_infos_logically_equal(rinfo, src)) + { + is_redundant = true; + break; + } + } + if (!is_redundant) + distribute_restrictinfo_to_rels(root, rinfo); + } + list_free(binfo_candidates); + list_free(jinfo_candidates); + + /* + * Arrange equivalence classes, mentioned removing a table, with the + * keeping one: varno of removing table should be replaced in members and + * sources lists. Also, remove duplicated elements if this replacement + * procedure created them. + */ + i = -1; + while ((i = bms_next_member(toRemove->eclass_indexes, i)) >= 0) + { + EquivalenceClass *ec = (EquivalenceClass *) list_nth(root->eq_classes, i); + + update_eclasses(ec, toRemove->relid, toKeep->relid); + toKeep->eclass_indexes = bms_add_member(toKeep->eclass_indexes, i); + } + + /* + * Transfer the targetlist and attr_needed flags. + */ + + foreach(lc, toRemove->reltarget->exprs) + { + Node *node = lfirst(lc); + ChangeVarNodes(node, toRemove->relid, toKeep->relid, 0); + if (!list_member(toKeep->reltarget->exprs, node)) + toKeep->reltarget->exprs = lappend(toKeep->reltarget->exprs, node); + } + + for (i = toKeep->min_attr; i <= toKeep->max_attr; i++) + { + int attno = i - toKeep->min_attr; + + toRemove->attr_needed[attno] = replace_relid(toRemove->attr_needed[attno], + toRemove->relid, toKeep->relid); + toKeep->attr_needed[attno] = bms_add_members(toKeep->attr_needed[attno], + toRemove->attr_needed[attno]); + } + + /* + * If the removed relation has a row mark, transfer it to the remaining + * one. + * + * If both rels have row marks, just keep the one corresponding to the + * remaining relation, because we verified earlier that they have the same + * strength. + */ + if (rmark) + { + if (kmark) + { + Assert(kmark->markType == rmark->markType); + + root->rowMarks = list_delete_ptr(root->rowMarks, rmark); + } + else + { + /* Shouldn't have inheritance children here. */ + Assert(rmark->rti == rmark->prti); + + rmark->rti = rmark->prti = toKeep->relid; + } + } + + /* Replace varno in all the query structures, except nodes RangeTblRef + * otherwise later remove_rel_from_joinlist will yield errors. + */ + ChangeVarNodesExtended((Node *) root->parse, toRemove->relid, toKeep->relid, 0, false); + + /* Replace links in the planner info */ + remove_rel_from_query(root, toRemove, toKeep->relid, NULL, NULL); + + /* At last, replace varno in root targetlist and HAVING clause */ + ChangeVarNodes((Node *) root->processed_tlist, toRemove->relid, toKeep->relid, 0); + ChangeVarNodes((Node *) root->processed_groupClause, toRemove->relid, toKeep->relid, 0); + + replace_relid(root->all_result_relids, toRemove->relid, toKeep->relid); + replace_relid(root->leaf_result_relids, toRemove->relid, toKeep->relid); + + + /* + * There may be references to the rel in root->fkey_list, but if so, + * match_foreign_keys_to_quals() will get rid of them. + */ + + /* + * Finally, remove the rel from the baserel array to prevent it from being + * referenced again. (We can't do this earlier because + * remove_join_clause_from_rels will touch it.) + */ + root->simple_rel_array[toRemove->relid] = NULL; + + /* And nuke the RelOptInfo, just in case there's another access path */ + pfree(toRemove); +} + +/* + * split_selfjoin_quals + * Processes 'joinquals' by building two lists: one containing the quals + * where the columns/exprs are on either side of the join match, and + * another one containing the remaining quals. + * + * 'joinquals' must only contain quals for a RTE_RELATION being joined to + * itself. + */ +static void +split_selfjoin_quals(PlannerInfo *root, List *joinquals, List **selfjoinquals, + List **otherjoinquals, int from, int to) +{ + List *sjoinquals = NIL; + List *ojoinquals = NIL; + + foreach_node(RestrictInfo, rinfo, joinquals) + { + OpExpr *expr; + Node *leftexpr; + Node *rightexpr; + + /* In general, clause looks like F(arg1) = G(arg2) */ + if (!rinfo->mergeopfamilies || + bms_num_members(rinfo->clause_relids) != 2 || + bms_membership(rinfo->left_relids) != BMS_SINGLETON || + bms_membership(rinfo->right_relids) != BMS_SINGLETON) + { + ojoinquals = lappend(ojoinquals, rinfo); + continue; + } + + expr = (OpExpr *) rinfo->clause; + + if (!IsA(expr, OpExpr) || list_length(expr->args) != 2) + { + ojoinquals = lappend(ojoinquals, rinfo); + continue; + } + + leftexpr = get_leftop(rinfo->clause); + rightexpr = copyObject(get_rightop(rinfo->clause)); + + if (leftexpr && IsA(leftexpr, RelabelType)) + leftexpr = (Node *) ((RelabelType *) leftexpr)->arg; + if (rightexpr && IsA(rightexpr, RelabelType)) + rightexpr = (Node *) ((RelabelType *) rightexpr)->arg; + + /* + * Quite an expensive operation, narrowing the use case. For example, + * when we have cast of the same var to different (but compatible) + * types. + */ + ChangeVarNodes(rightexpr, bms_singleton_member(rinfo->right_relids), + bms_singleton_member(rinfo->left_relids), 0); + + if (equal(leftexpr, rightexpr)) + sjoinquals = lappend(sjoinquals, rinfo); + else + ojoinquals = lappend(ojoinquals, rinfo); + } + + *selfjoinquals = sjoinquals; + *otherjoinquals = ojoinquals; +} + +/* + * Check for a case when uniqueness is at least partly derived from a + * baserestrictinfo clause. In this case, we have a chance to return only + * one row (if such clauses on both sides of SJ are equal) or nothing (if they + * are different). + */ +static bool +match_unique_clauses(PlannerInfo *root, RelOptInfo *outer, List *uclauses, + Index relid) +{ + foreach_node(RestrictInfo, rinfo, uclauses) + { + Expr *clause; + Node *iclause; + Node *c1; + bool matched = false; + + Assert(outer->relid > 0 && relid > 0); + + /* Only filters like f(R.x1,...,R.xN) == expr we should consider. */ + Assert(bms_is_empty(rinfo->left_relids) ^ + bms_is_empty(rinfo->right_relids)); + + clause = (Expr *) copyObject(rinfo->clause); + ChangeVarNodes((Node *) clause, relid, outer->relid, 0); + + iclause = bms_is_empty(rinfo->left_relids) ? get_rightop(clause) : + get_leftop(clause); + c1 = bms_is_empty(rinfo->left_relids) ? get_leftop(clause) : + get_rightop(clause); + + /* + * Compare these left and right sides with the corresponding sides of + * the outer's filters. If no one is detected - return immediately. + */ + foreach_node(RestrictInfo, orinfo, outer->baserestrictinfo) + { + Node *oclause; + Node *c2; + + if (orinfo->mergeopfamilies == NIL) + /* Don't consider clauses which aren't similar to 'F(X)=G(Y)' */ + continue; + + Assert(is_opclause(orinfo->clause)); + + oclause = bms_is_empty(orinfo->left_relids) ? + get_rightop(orinfo->clause) : get_leftop(orinfo->clause); + c2 = (bms_is_empty(orinfo->left_relids) ? + get_leftop(orinfo->clause) : get_rightop(orinfo->clause)); + + if (equal(iclause, oclause) && equal(c1, c2)) + { + matched = true; + break; + } + } + + if (!matched) + return false; + } + + return true; +} + +/* + * Find and remove unique self joins in a group of base relations that have + * the same Oid. + * + * Returns a set of relids that were removed. + */ +static Relids +remove_self_joins_one_group(PlannerInfo *root, Relids relids) +{ + Relids result = NULL; + int k; /* Index of kept relation */ + int r = -1; /* Index of removed relation */ + + while ((r = bms_next_member(relids, r)) > 0) + { + RelOptInfo *inner = root->simple_rel_array[r]; + + k = r; + + while ((k = bms_next_member(relids, k)) > 0) + { + Relids joinrelids = NULL; + RelOptInfo *outer = root->simple_rel_array[k]; + List *restrictlist; + List *selfjoinquals; + List *otherjoinquals; + ListCell *lc; + bool jinfo_check = true; + PlanRowMark *omark = NULL; + PlanRowMark *imark = NULL; + List *uclauses = NIL; + + /* A sanity check: the relations have the same Oid. */ + Assert(root->simple_rte_array[k]->relid == + root->simple_rte_array[r]->relid); + + /* + * It is impossible to eliminate join of two relations if they + * belong to different rules of order. Otherwise planner can't be + * able to find any variants of correct query plan. + */ + foreach(lc, root->join_info_list) + { + SpecialJoinInfo *info = (SpecialJoinInfo *) lfirst(lc); + + if ((bms_is_member(k, info->syn_lefthand) ^ + bms_is_member(r, info->syn_lefthand)) || + (bms_is_member(k, info->syn_righthand) ^ + bms_is_member(r, info->syn_righthand))) + { + jinfo_check = false; + break; + } + } + if (!jinfo_check) + continue; + + /* + * Check Row Marks equivalence. We can't remove the join if the + * relations have row marks of different strength (e.g. one is + * locked FOR UPDATE and another just has ROW_MARK_REFERENCE for + * EvalPlanQual rechecking). + */ + foreach(lc, root->rowMarks) + { + PlanRowMark *rowMark = (PlanRowMark *) lfirst(lc); + + if (rowMark->rti == k) + { + Assert(imark == NULL); + imark = rowMark; + } + else if (rowMark->rti == r) + { + Assert(omark == NULL); + omark = rowMark; + } + + if (omark && imark) + break; + } + if (omark && imark && omark->markType != imark->markType) + continue; + + /* + * We only deal with base rels here, so their relids bitset + * contains only one member -- their relid. + */ + joinrelids = bms_add_member(joinrelids, r); + joinrelids = bms_add_member(joinrelids, k); + + /* + * PHVs should not impose any constraints on removing self joins. + */ + + /* + * At this stage, joininfo lists of inner and outer can contain + * only clauses, required for a superior outer join that can't + * influence this optimization. So, we can avoid to call the + * build_joinrel_restrictlist() routine. + */ + restrictlist = generate_join_implied_equalities(root, joinrelids, + inner->relids, + outer, NULL); + if (restrictlist == NIL) + continue; + + /* + * Process restrictlist to separate the self join quals out of the + * other quals. e.g x = x goes to selfjoinquals and a = b to + * otherjoinquals. + */ + split_selfjoin_quals(root, restrictlist, &selfjoinquals, + &otherjoinquals, inner->relid, outer->relid); + + Assert(list_length(restrictlist) == + (list_length(selfjoinquals) + list_length(otherjoinquals))); + + /* + * To enable SJE for the only degenerate case without any self + * join clauses at all, add baserestrictinfo to this list. The + * degenerate case works only if both sides have the same clause. + * So doesn't matter which side to add. + */ + selfjoinquals = list_concat(selfjoinquals, outer->baserestrictinfo); + + /* + * Determine if the inner table can duplicate outer rows. We must + * bypass the unique rel cache here since we're possibly using a + * subset of join quals. We can use 'force_cache' == true when all + * join quals are self-join quals. Otherwise, we could end up + * putting false negatives in the cache. + */ + if (!innerrel_is_unique_ext(root, joinrelids, inner->relids, + outer, JOIN_INNER, selfjoinquals, + list_length(otherjoinquals) == 0, + &uclauses)) + continue; + + /* + * uclauses is the copy of outer->baserestrictinfo that associated with a index. + * We already proven that based on {outer->baserestrictinfo, selfjoinquals} + * comparsion with unique index we can guarantees that for the outer rel there is + * at most one row where columns equal given values. Sometimes that is not ok. e.g. + * "where s1.b = s2.b and s1.a = 1 and s2.a = 2". (the index is based on column(a,b). + * in these scarenio, we also need to validate equality of outer->baserestrictinfo + * with inner->outer->baserestrictinfo, + * or else we won't match the same row on each side of the join. + */ + if (!match_unique_clauses(root, inner, uclauses, outer->relid)) + continue; + + /* + * We can remove either relation, so remove the inner one in order + * to simplify this loop. + */ + remove_self_join_rel(root, omark, imark, outer, inner, restrictlist); + + result = bms_add_member(result, r); + + /* We have removed the outer relation, try the next one. */ + break; + } + } + + return result; +} + +/* + * Gather indexes of base relations from the joinlist and try to eliminate self + * joins. + */ +static Relids +remove_self_joins_recurse(PlannerInfo *root, List *joinlist, Relids toRemove) +{ + ListCell *jl; + Relids relids = NULL; + SelfJoinCandidate *candidates = NULL; + int i; + int j; + int numRels; + + /* Collect indexes of base relations of the join tree */ + foreach(jl, joinlist) + { + Node *jlnode = (Node *) lfirst(jl); + + if (IsA(jlnode, RangeTblRef)) + { + int varno = ((RangeTblRef *) jlnode)->rtindex; + RangeTblEntry *rte = root->simple_rte_array[varno]; + + /* + * We only consider ordinary relations as candidates to be removed, + * and these relations should not have TABLESAMPLE clauses + * specified. Removing a relation with TABLESAMPLE clause could + * potentially change the syntax of the query. + * Because of UPDATE/DELETE EPQ meachanism, currently Query->resultRelation + * or Query->mergeTargetRelation associated rel cannot be eliminated. + */ + if (rte->rtekind == RTE_RELATION && + rte->relkind == RELKIND_RELATION && + rte->tablesample == NULL && + varno != root->parse->resultRelation && + varno != root->parse->mergeTargetRelation) + { + Assert(!bms_is_member(varno, relids)); + relids = bms_add_member(relids, varno); + } + } + else if (IsA(jlnode, List)) + { + /* Recursively go inside the sub-joinlist */ + toRemove = remove_self_joins_recurse(root, (List *) jlnode, + toRemove); + } + else + elog(ERROR, "unrecognized joinlist node type: %d", + (int) nodeTag(jlnode)); + } + + numRels = bms_num_members(relids); + + /* Need at least two relations for the join */ + if (numRels < 2) + return toRemove; + + /* + * In order to find relations with the same oid we first build an array of + * candidates and then sort it by oid. + */ + candidates = (SelfJoinCandidate *) palloc(sizeof(SelfJoinCandidate) * + numRels); + i = -1; + j = 0; + while ((i = bms_next_member(relids, i)) >= 0) + { + candidates[j].relid = i; + candidates[j].reloid = root->simple_rte_array[i]->relid; + j++; + } + + qsort(candidates, numRels, sizeof(SelfJoinCandidate), + self_join_candidates_cmp); + + /* + * Iteratively form a group of relation indexes with the same oid and + * launch the routine that detects self-joins in this group and removes + * excessive range table entries. + * + * At the end of the iteration, exclude the group from the overall relids + * list. So each next iteration of the cycle will involve less and less + * value of relids. + */ + i = 0; + for (j = 1; j < numRels + 1; j++) + { + if (j == numRels || candidates[j].reloid != candidates[i].reloid) + { + if (j - i >= 2) + { + /* Create a group of relation indexes with the same oid */ + Relids group = NULL; + Relids removed; + + while (i < j) + { + group = bms_add_member(group, candidates[i].relid); + i++; + } + relids = bms_del_members(relids, group); + + /* + * Try to remove self-joins from a group of identical entries. + * Make the next attempt iteratively - if something is deleted + * from a group, changes in clauses and equivalence classes + * can give us a chance to find more candidates. + */ + do + { + Assert(!bms_overlap(group, toRemove)); + removed = remove_self_joins_one_group(root, group); + toRemove = bms_add_members(toRemove, removed); + group = bms_del_members(group, removed); + } while (!bms_is_empty(removed) && + bms_membership(group) == BMS_MULTIPLE); + bms_free(removed); + bms_free(group); + } + else + { + /* Single relation, just remove it from the set */ + relids = bms_del_member(relids, candidates[i].relid); + i = j; + } + } + } + + Assert(bms_is_empty(relids)); + + return toRemove; +} + +/* + * Compare self-join candidates by their oids. + */ +static int +self_join_candidates_cmp(const void *a, const void *b) +{ + const SelfJoinCandidate *ca = (const SelfJoinCandidate *) a; + const SelfJoinCandidate *cb = (const SelfJoinCandidate *) b; + + if (ca->reloid != cb->reloid) + return (ca->reloid < cb->reloid ? -1 : 1); + else + return 0; +} + +/* + * Find and remove useless self joins. + * + * Search for joins where a relation is joined to itself. If the join clause + * for each tuple from one side of the join is proven to match the same + * physical row (or nothing) on the other side, that self-join can be + * eliminated from the query. Suitable join clauses are assumed to be in the + * form of X = X, and can be replaced with NOT NULL clauses. + * + * For the sake of simplicity, we don't apply this optimization to special + * joins. Here is a list of what we could do in some particular cases: + * 'a a1 semi join a a2': is reduced to inner by reduce_unique_semijoins, + * and then removed normally. + * 'a a1 anti join a a2': could simplify to a scan with 'outer quals AND + * (IS NULL on join columns OR NOT inner quals)'. + * 'a a1 left join a a2': could simplify to a scan like inner but without + * NOT NULL conditions on join columns. + * 'a a1 left join (a a2 join b)': can't simplify this, because join to b + * can both remove rows and introduce duplicates. + * + * To search for removable joins, we order all the relations on their Oid, + * go over each set with the same Oid, and consider each pair of relations + * in this set. + * + * To remove the join, we mark one of the participating relations as dead + * and rewrite all references to it to point to the remaining relation. + * This includes modifying RestrictInfos, EquivalenceClasses, and + * EquivalenceMembers. We also have to modify the row marks. The join clauses + * of the removed relation become either restriction or join clauses, based on + * whether they reference any relations not participating in the removed join. + * + * 'joinlist' is the top-level joinlist of the query. If it has any + * references to the removed relations, we update them to point to the + * remaining ones. + */ +List * +remove_useless_self_joins(PlannerInfo *root, List *joinlist) +{ + Relids toRemove = NULL; + int relid = -1; + + if (!enable_self_join_removal || joinlist == NIL || + (list_length(joinlist) == 1 && !IsA(linitial(joinlist), List))) + return joinlist; + + /* + * Merge pairs of relations participated in self-join. Remove unnecessary + * range table entries. + */ + toRemove = remove_self_joins_recurse(root, joinlist, toRemove); + + if (unlikely(toRemove != NULL)) + { + /* At the end, remove orphaned relation links */ + while ((relid = bms_next_member(toRemove, relid)) >= 0) + { + int nremoved = 0; + + joinlist = remove_rel_from_joinlist(joinlist, relid, &nremoved); + if (nremoved != 1) + elog(ERROR, "failed to find relation %d in joinlist", relid); + } + } + + return joinlist; } diff --git a/src/backend/optimizer/plan/planmain.c b/src/backend/optimizer/plan/planmain.c index e17d31a5..075d36c7 100644 --- a/src/backend/optimizer/plan/planmain.c +++ b/src/backend/optimizer/plan/planmain.c @@ -230,6 +230,11 @@ query_planner(PlannerInfo *root, */ reduce_unique_semijoins(root); + /* + * Remove self joins on a unique column. + */ + joinlist = remove_useless_self_joins(root, joinlist); + /* * Now distribute "placeholders" to base rels as needed. This has to be * done after join removal because removal could change whether a diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c index 1c69c6e9..c5a8d00b 100644 --- a/src/backend/optimizer/prep/prepunion.c +++ b/src/backend/optimizer/prep/prepunion.c @@ -662,7 +662,7 @@ build_setop_child_paths(PlannerInfo *root, RelOptInfo *rel, * otherwise do statistical estimation. * * XXX you don't really want to know about this: we do the estimation - * using the subquery's original targetlist expressions, not the + * using the subroot->parse's original targetlist expressions, not the * subroot->processed_tlist which might seem more appropriate. The reason * is that if the subquery is itself a setop, it may return a * processed_tlist containing "varno 0" Vars generated by @@ -670,6 +670,9 @@ build_setop_child_paths(PlannerInfo *root, RelOptInfo *rel, * mightily. We ought to get rid of the "varno 0" hack, but that requires * a redesign of the parsetree representation of setops, so that there can * be an RTE corresponding to each setop's output. + * Note, we use this not subquery's targetlist but subroot->parse's + * targetlist, because it was revised by self-join removal. subquery's + * targetlist might contain the references to the removed relids. */ if (pNumGroups) { @@ -682,7 +685,7 @@ build_setop_child_paths(PlannerInfo *root, RelOptInfo *rel, *pNumGroups = rel->cheapest_total_path->rows; else *pNumGroups = estimate_num_groups(subroot, - get_tlist_exprs(subquery->targetList, false), + get_tlist_exprs(subroot->parse->targetList, false), rel->cheapest_total_path->rows, NULL, NULL); diff --git a/src/backend/rewrite/rewriteManip.c b/src/backend/rewrite/rewriteManip.c index 191f2dc0..da69ac5a 100644 --- a/src/backend/rewrite/rewriteManip.c +++ b/src/backend/rewrite/rewriteManip.c @@ -540,6 +540,33 @@ offset_relid_set(Relids relids, int offset) return result; } +/* + * Substitute newId by oldId in relids. + * + * We must make a copy of the original Bitmapset before making any + * modifications, because the same pointer to it might be shared among + * different places. + * Also, this function can be used in 'delete only' mode (newId < 0). + * It allows us to utilise the same code in the remove_useless_joins and the + * remove_self_joins features. + */ +Bitmapset * +replace_relid(Relids relids, int oldId, int newId) +{ + if (oldId < 0) + return relids; + + /* Delete relid without substitution. */ + if (newId < 0) + return bms_del_member(bms_copy(relids), oldId); + + /* Substitute newId for oldId. */ + if (bms_is_member(oldId, relids)) + return bms_add_member(bms_del_member(bms_copy(relids), oldId), newId); + + return relids; +} + /* * ChangeVarNodes - adjust Var nodes for a specific change of RT index * @@ -548,6 +575,7 @@ offset_relid_set(Relids relids, int offset) * to 'new_index'. The varnosyn fields are changed too. Also, adjust other * nodes that contain rangetable indexes, such as RangeTblRef and JoinExpr. * + * change_RangeTblRef: do we change RangeTblRef or not. see ChangeVarNodesExtended. * NOTE: although this has the form of a walker, we cheat and modify the * nodes in-place. The given expression tree should have been copied * earlier to ensure that no unwanted side-effects occur! @@ -558,6 +586,7 @@ typedef struct int rt_index; int new_index; int sublevels_up; + bool change_RangeTblRef; } ChangeVarNodes_context; static bool @@ -590,7 +619,7 @@ ChangeVarNodes_walker(Node *node, ChangeVarNodes_context *context) cexpr->cvarno = context->new_index; return false; } - if (IsA(node, RangeTblRef)) + if (IsA(node, RangeTblRef) && context->change_RangeTblRef) { RangeTblRef *rtr = (RangeTblRef *) node; @@ -637,6 +666,75 @@ ChangeVarNodes_walker(Node *node, ChangeVarNodes_context *context) } return false; } + if (IsA(node, RestrictInfo)) + { + RestrictInfo *rinfo = (RestrictInfo *) node; + int relid = -1; + bool is_req_equal = + (rinfo->required_relids == rinfo->clause_relids); + bool clause_relids_is_multiple = + (bms_membership(rinfo->clause_relids) == BMS_MULTIPLE); + + if (bms_is_member(context->rt_index, rinfo->clause_relids)) + { + expression_tree_walker((Node *) rinfo->clause, ChangeVarNodes_walker, (void *) context); + expression_tree_walker((Node *) rinfo->orclause, ChangeVarNodes_walker, (void *) context); + + rinfo->clause_relids = + replace_relid(rinfo->clause_relids, context->rt_index, context->new_index); + rinfo->num_base_rels = bms_num_members(rinfo->clause_relids); + rinfo->left_relids = + replace_relid(rinfo->left_relids, context->rt_index, context->new_index); + rinfo->right_relids = + replace_relid(rinfo->right_relids, context->rt_index, context->new_index); + } + + if (is_req_equal) + rinfo->required_relids = rinfo->clause_relids; + else + rinfo->required_relids = + replace_relid(rinfo->required_relids, context->rt_index, context->new_index); + + rinfo->outer_relids = + replace_relid(rinfo->outer_relids, context->rt_index, context->new_index); + rinfo->incompatible_relids = + replace_relid(rinfo->incompatible_relids, context->rt_index, context->new_index); + + if (rinfo->mergeopfamilies && + bms_get_singleton_member(rinfo->clause_relids, &relid) && + clause_relids_is_multiple && + relid == context->new_index && IsA(rinfo->clause, OpExpr)) + { + Expr *leftOp; + Expr *rightOp; + + leftOp = (Expr *) get_leftop(rinfo->clause); + rightOp = (Expr *) get_rightop(rinfo->clause); + + /* + * for SJE (self join elimination), changing varnos will make t1.a = t2.a + * to "t1.a = t1.a", which is always true, we can optizmied it out. + * to optimzied it out, we use equal to validate, + * but we also need to add a not null qual (NullTest). + * + */ + if (leftOp != NULL && equal(leftOp, rightOp)) + { + NullTest *ntest = makeNode(NullTest); + + ntest->arg = leftOp; + ntest->nulltesttype = IS_NOT_NULL; + ntest->argisrow = false; + ntest->location = -1; + rinfo->clause = (Expr *) ntest; + rinfo->mergeopfamilies = NIL; + rinfo->left_em = NULL; + rinfo->right_em = NULL; + } + Assert(rinfo->orclause == NULL); + } + return false; + } if (IsA(node, AppendRelInfo)) { AppendRelInfo *appinfo = (AppendRelInfo *) node; @@ -670,6 +768,55 @@ ChangeVarNodes_walker(Node *node, ChangeVarNodes_context *context) (void *) context); } +/* + * almost equivalent to ChangeVarNodes. also see comments in ChangeVarNodes. + * difference with ChangeVarNodes: + * when we use ChangeVarNodes for node we change all of it's underlying nodes. + * but for SJE we don't want to change node type: RangeTblRef, in some occasion. + * because SJE last step, remove_rel_from_joinlist is to + * remove left node (RangeTblRef) one by one. if in any case while using + * ChangeVarNodes, by accidently leaf node RangeTblRef also being updated + * then remove_rel_from_joinlist will have error. +*/ +void +ChangeVarNodesExtended(Node *node, int rt_index, int new_index, int sublevels_up, bool change_RangeTblRef) +{ + ChangeVarNodes_context context; + context.rt_index = rt_index; + context.new_index = new_index; + context.sublevels_up = sublevels_up; + context.change_RangeTblRef = change_RangeTblRef; + + if (node && IsA(node, Query)) + { + Query *qry = (Query *) node; + if (sublevels_up == 0) + { + ListCell *l; + + if (qry->resultRelation == rt_index) + qry->resultRelation = new_index; + + if (qry->mergeTargetRelation == rt_index) + qry->mergeTargetRelation = new_index; + + if (qry->onConflict && qry->onConflict->exclRelIndex == rt_index) + qry->onConflict->exclRelIndex = new_index; + + foreach(l, qry->rowMarks) + { + RowMarkClause *rc = (RowMarkClause *) lfirst(l); + + if (rc->rti == rt_index) + rc->rti = new_index; + } + } + query_tree_walker(qry, ChangeVarNodes_walker, + (void *) &context, 0); + } + else + ChangeVarNodes_walker(node, &context); +} void ChangeVarNodes(Node *node, int rt_index, int new_index, int sublevels_up) { @@ -678,6 +825,7 @@ ChangeVarNodes(Node *node, int rt_index, int new_index, int sublevels_up) context.rt_index = rt_index; context.new_index = new_index; context.sublevels_up = sublevels_up; + context.change_RangeTblRef = true; /* * Must be prepared to start with a Query or a bare expression tree; if diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c index 630ed0f1..40db9d25 100644 --- a/src/backend/utils/misc/guc_tables.c +++ b/src/backend/utils/misc/guc_tables.c @@ -988,6 +988,16 @@ struct config_bool ConfigureNamesBool[] = true, NULL, NULL, NULL }, + { + {"enable_self_join_removal", PGC_USERSET, QUERY_TUNING_METHOD, + gettext_noop("Enable removal of unique self-joins."), + NULL, + GUC_EXPLAIN | GUC_NOT_IN_SAMPLE + }, + &enable_self_join_removal, + true, + NULL, NULL, NULL + }, { {"enable_group_by_reordering", PGC_USERSET, QUERY_TUNING_METHOD, gettext_noop("Enables reordering of GROUP BY keys."), diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index 14ccfc1a..56b34ae3 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -728,7 +728,7 @@ typedef struct PartitionSchemeData *PartitionScheme; * populate these fields, for base rels; but someday they might be used for * join rels too: * - * unique_for_rels - list of Relid sets, each one being a set of other + * unique_for_rels - list of UniqueRelInfo, each one being a set of other * rels for which this one has been proven unique * non_unique_for_rels - list of Relid sets, each one being a set of * other rels for which we have tried and failed to prove @@ -967,7 +967,7 @@ typedef struct RelOptInfo /* * cache space for remembering if we have proven this relation unique */ - /* known unique for these other relid set(s) */ + /* known unique for these other relid set(s) given in UniqueRelInfo(s) */ List *unique_for_rels; /* known not unique for these set(s) */ List *non_unique_for_rels; @@ -3432,4 +3432,35 @@ typedef struct AggTransInfo bool initValueIsNull; } AggTransInfo; +/* + * UniqueRelInfo caches a fact that a relation is unique when being joined + * to other relation(s). + */ +typedef struct UniqueRelInfo +{ + pg_node_attr(no_copy_equal, no_read, no_query_jumble) + + NodeTag type; + + /* + * The relation in consideration is unique when being joined with this set + * of other relation(s). + */ + Relids outerrelids; + + /* + * The relation in consideration is unique when considering only clauses + * suitable for self-join (passed split_selfjoin_quals()). + */ + bool self_join; + + /* + * Additional clauses from a baserestrictinfo list that were used to prove + * the uniqueness. We cache it for the self-join checking procedure: a + * self-join can be removed if the outer relation contains strictly the + * same set of clauses. + */ + List *extra_clauses; +} UniqueRelInfo; + #endif /* PATHNODES_H */ diff --git a/src/include/optimizer/optimizer.h b/src/include/optimizer/optimizer.h index 7b63c5cf..4381124f 100644 --- a/src/include/optimizer/optimizer.h +++ b/src/include/optimizer/optimizer.h @@ -191,6 +191,8 @@ extern SortGroupClause *get_sortgroupref_clause_noerr(Index sortref, * output list */ #define PVC_RECURSE_PLACEHOLDERS 0x0020 /* recurse into PlaceHolderVar * arguments */ +#define PVC_INCLUDE_CONVERTROWTYPES 0x0040 /* include ConvertRowtypeExprs in + * output list */ extern Bitmapset *pull_varnos(PlannerInfo *root, Node *node); extern Bitmapset *pull_varnos_of_level(PlannerInfo *root, Node *node, int levelsup); diff --git a/src/include/optimizer/paths.h b/src/include/optimizer/paths.h index 5e88c022..fff4b69d 100644 --- a/src/include/optimizer/paths.h +++ b/src/include/optimizer/paths.h @@ -72,6 +72,9 @@ extern void create_index_paths(PlannerInfo *root, RelOptInfo *rel); extern bool relation_has_unique_index_for(PlannerInfo *root, RelOptInfo *rel, List *restrictlist, List *exprlist, List *oprlist); +extern bool relation_has_unique_index_ext(PlannerInfo *root, RelOptInfo *rel, + List *restrictlist, List *exprlist, + List *oprlist, List **extra_clauses); extern bool indexcol_is_bool_constant_for_query(PlannerInfo *root, IndexOptInfo *index, int indexcol); diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h index aafc1737..f2e3fa4c 100644 --- a/src/include/optimizer/planmain.h +++ b/src/include/optimizer/planmain.h @@ -20,6 +20,7 @@ /* GUC parameters */ #define DEFAULT_CURSOR_TUPLE_FRACTION 0.1 extern PGDLLIMPORT double cursor_tuple_fraction; +extern PGDLLIMPORT bool enable_self_join_removal; /* query_planner callback to compute query_pathkeys */ typedef void (*query_pathkeys_callback) (PlannerInfo *root, void *extra); @@ -108,6 +109,11 @@ extern bool query_is_distinct_for(Query *query, List *colnos, List *opids); extern bool innerrel_is_unique(PlannerInfo *root, Relids joinrelids, Relids outerrelids, RelOptInfo *innerrel, JoinType jointype, List *restrictlist, bool force_cache); +extern bool innerrel_is_unique_ext(PlannerInfo *root, Relids joinrelids, + Relids outerrelids, RelOptInfo *innerrel, + JoinType jointype, List *restrictlist, + bool force_cache, List **uclauses); +extern List *remove_useless_self_joins(PlannerInfo *root, List *jointree); /* * prototypes for plan/setrefs.c diff --git a/src/include/rewrite/rewriteManip.h b/src/include/rewrite/rewriteManip.h index ac6d2049..074847f1 100644 --- a/src/include/rewrite/rewriteManip.h +++ b/src/include/rewrite/rewriteManip.h @@ -15,6 +15,7 @@ #define REWRITEMANIP_H #include "nodes/parsenodes.h" +#include "nodes/pathnodes.h" struct AttrMap; /* avoid including attmap.h here */ @@ -41,11 +42,14 @@ typedef enum ReplaceVarsNoMatchOption } ReplaceVarsNoMatchOption; +extern Bitmapset *replace_relid(Relids relids, int oldId, int newId); extern void CombineRangeTables(List **dst_rtable, List **dst_perminfos, List *src_rtable, List *src_perminfos); extern void OffsetVarNodes(Node *node, int offset, int sublevels_up); extern void ChangeVarNodes(Node *node, int rt_index, int new_index, int sublevels_up); +void ChangeVarNodesExtended(Node *node, int rt_index, int new_index, + int sublevels_up, bool change_RangeTblRef); extern void IncrementVarSublevelsUp(Node *node, int delta_sublevels_up, int min_sublevels_up); extern void IncrementVarSublevelsUp_rtable(List *rtable, diff --git a/src/test/regress/expected/equivclass.out b/src/test/regress/expected/equivclass.out index 126f7047..3d5de283 100644 --- a/src/test/regress/expected/equivclass.out +++ b/src/test/regress/expected/equivclass.out @@ -430,6 +430,36 @@ explain (costs off) Filter: ((unique1 IS NOT NULL) AND (unique2 IS NOT NULL)) (2 rows) +-- Test that broken ECs are processed correctly during self join removal. +-- Disable merge joins so that we don't get an error about missing commutator. +-- Test both orientations of the join clause, because only one of them breaks +-- the EC. +set enable_mergejoin to off; +explain (costs off) + select * from ec0 m join ec0 n on m.ff = n.ff + join ec1 p on m.ff + n.ff = p.f1; + QUERY PLAN +--------------------------------------- + Nested Loop + Join Filter: ((n.ff + n.ff) = p.f1) + -> Seq Scan on ec0 n + -> Materialize + -> Seq Scan on ec1 p +(5 rows) + +explain (costs off) + select * from ec0 m join ec0 n on m.ff = n.ff + join ec1 p on p.f1::int8 = (m.ff + n.ff)::int8alias1; + QUERY PLAN +--------------------------------------------------------------- + Nested Loop + Join Filter: ((p.f1)::bigint = ((n.ff + n.ff))::int8alias1) + -> Seq Scan on ec0 n + -> Materialize + -> Seq Scan on ec1 p +(5 rows) + +reset enable_mergejoin; -- this could be converted, but isn't at present explain (costs off) select * from tenk1 where unique1 = unique1 or unique2 = unique2; diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index 53f70d72..4e4cec63 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -6213,6 +6213,1070 @@ select * from ----+----+----+---- (0 rows) +-- +-- test that semi- or inner self-joins on a unique column are removed +-- +-- enable only nestloop to get more predictable plans +set enable_hashjoin to off; +set enable_mergejoin to off; +create table sj (a int unique, b int, c int unique); +insert into sj values (1, null, 2), (null, 2, null), (2, 1, 1); +analyze sj; +-- Trivial self-join case. +explain (costs off) +select p.* from sj p, sj q where q.a = p.a and q.b = q.a - 1; + QUERY PLAN +----------------------------------------------- + Seq Scan on sj q + Filter: ((a IS NOT NULL) AND (b = (a - 1))) +(2 rows) + +select p.* from sj p, sj q where q.a = p.a and q.b = q.a - 1; + a | b | c +---+---+--- + 2 | 1 | 1 +(1 row) + +-- Self-join removal performs after a subquery pull-up process and could remove +-- such kind of self-join too. Check this option. +explain (costs off) +select * from sj p +where exists (select * from sj q + where q.a = p.a and q.b < 10); + QUERY PLAN +------------------------------------------ + Seq Scan on sj q + Filter: ((a IS NOT NULL) AND (b < 10)) +(2 rows) + +select * from sj p +where exists (select * from sj q + where q.a = p.a and q.b < 10); + a | b | c +---+---+--- + 2 | 1 | 1 +(1 row) + +-- Don't remove self-join for the case of equality of two different unique columns. +explain (costs off) +select * from sj t1, sj t2 where t1.a = t2.c and t1.b is not null; + QUERY PLAN +--------------------------------------- + Nested Loop + Join Filter: (t1.a = t2.c) + -> Seq Scan on sj t2 + -> Materialize + -> Seq Scan on sj t1 + Filter: (b IS NOT NULL) +(6 rows) + +-- Ensure that relations with TABLESAMPLE clauses are not considered as +-- candidates to be removed +explain (costs off) +select * from sj t1 + join lateral + (select * from sj tablesample system(t1.b)) s + on t1.a = s.a; + QUERY PLAN +--------------------------------------- + Nested Loop + -> Seq Scan on sj t1 + -> Memoize + Cache Key: t1.a, t1.b + Cache Mode: binary + -> Sample Scan on sj + Sampling: system (t1.b) + Filter: (t1.a = a) +(8 rows) + +-- Ensure that SJE does not form a self-referential lateral dependency +explain (costs off) +select * from sj t1 + left join lateral + (select t1.a as t1a, * from sj t2) s + on true +where t1.a = s.a; + QUERY PLAN +--------------------------- + Seq Scan on sj t2 + Filter: (a IS NOT NULL) +(2 rows) + +-- Degenerated case. +explain (costs off) +select * from + (select a as x from sj where false) as q1, + (select a as y from sj where false) as q2 +where q1.x = q2.y; + QUERY PLAN +-------------------------- + Result + One-Time Filter: false +(2 rows) + +-- We can't use a cross-EC generated self join qual because of current logic of +-- the generate_join_implied_equalities routine. +explain (costs off) +select * from sj t1, sj t2 where t1.a = t1.b and t1.b = t2.b and t2.b = t2.a; + QUERY PLAN +------------------------------ + Nested Loop + Join Filter: (t1.a = t2.b) + -> Seq Scan on sj t1 + Filter: (a = b) + -> Seq Scan on sj t2 + Filter: (b = a) +(6 rows) + +explain (costs off) +select * from sj t1, sj t2, sj t3 +where t1.a = t1.b and t1.b = t2.b and t2.b = t2.a and + t1.b = t3.b and t3.b = t3.a; + QUERY PLAN +------------------------------------ + Nested Loop + Join Filter: (t1.a = t3.b) + -> Nested Loop + Join Filter: (t1.a = t2.b) + -> Seq Scan on sj t1 + Filter: (a = b) + -> Seq Scan on sj t2 + Filter: (b = a) + -> Seq Scan on sj t3 + Filter: (b = a) +(10 rows) + +-- Double self-join removal. +-- Use a condition on "b + 1", not on "b", for the second join, so that +-- the equivalence class is different from the first one, and we can +-- test the non-ec code path. +explain (costs off) +select * +from sj t1 + join sj t2 on t1.a = t2.a and t1.b = t2.b + join sj t3 on t2.a = t3.a and t2.b + 1 = t3.b + 1; + QUERY PLAN +--------------------------------------------------------------------------- + Seq Scan on sj t3 + Filter: ((a IS NOT NULL) AND (b IS NOT NULL) AND ((b + 1) IS NOT NULL)) +(2 rows) + +-- subselect that references the removed relation +explain (costs off) +select t1.a, (select a from sj where a = t2.a and a = t1.a) +from sj t1, sj t2 +where t1.a = t2.a; + QUERY PLAN +------------------------------------------ + Seq Scan on sj t2 + Filter: (a IS NOT NULL) + SubPlan 1 + -> Result + One-Time Filter: (t2.a = t2.a) + -> Seq Scan on sj + Filter: (a = t2.a) +(7 rows) + +-- self-join under outer join +explain (costs off) +select * from sj x join sj y on x.a = y.a +left join int8_tbl z on x.a = z.q1; + QUERY PLAN +------------------------------------ + Nested Loop Left Join + Join Filter: (y.a = z.q1) + -> Seq Scan on sj y + Filter: (a IS NOT NULL) + -> Materialize + -> Seq Scan on int8_tbl z +(6 rows) + +explain (costs off) +select * from sj x join sj y on x.a = y.a +left join int8_tbl z on y.a = z.q1; + QUERY PLAN +------------------------------------ + Nested Loop Left Join + Join Filter: (y.a = z.q1) + -> Seq Scan on sj y + Filter: (a IS NOT NULL) + -> Materialize + -> Seq Scan on int8_tbl z +(6 rows) + +explain (costs off) +select * from ( + select t1.*, t2.a as ax from sj t1 join sj t2 + on (t1.a = t2.a and t1.c * t1.c = t2.c + 2 and t2.b is null) +) as q1 +left join + (select t3.* from sj t3, sj t4 where t3.c = t4.c) as q2 +on q1.ax = q2.a; + QUERY PLAN +--------------------------------------------------------------------------- + Nested Loop Left Join + Join Filter: (t2.a = t4.a) + -> Seq Scan on sj t2 + Filter: ((b IS NULL) AND (a IS NOT NULL) AND ((c * c) = (c + 2))) + -> Seq Scan on sj t4 + Filter: (c IS NOT NULL) +(6 rows) + +-- Test that placeholders are updated correctly after join removal +explain (costs off) +select * from (values (1)) x +left join (select coalesce(y.q1, 1) from int8_tbl y + right join sj j1 inner join sj j2 on j1.a = j2.a + on true) z +on true; + QUERY PLAN +------------------------------------------ + Nested Loop Left Join + -> Result + -> Nested Loop Left Join + -> Seq Scan on sj j2 + Filter: (a IS NOT NULL) + -> Materialize + -> Seq Scan on int8_tbl y +(7 rows) + +-- Test that references to the removed rel in lateral subqueries are replaced +-- correctly after join removal +explain (verbose, costs off) +select t3.a from sj t1 + join sj t2 on t1.a = t2.a + join lateral (select t1.a offset 0) t3 on true; + QUERY PLAN +------------------------------------ + Nested Loop + Output: (t2.a) + -> Seq Scan on public.sj t2 + Output: t2.a, t2.b, t2.c + Filter: (t2.a IS NOT NULL) + -> Result + Output: t2.a +(7 rows) + +explain (verbose, costs off) +select t3.a from sj t1 + join sj t2 on t1.a = t2.a + join lateral (select * from (select t1.a offset 0) offset 0) t3 on true; + QUERY PLAN +------------------------------------ + Nested Loop + Output: (t2.a) + -> Seq Scan on public.sj t2 + Output: t2.a, t2.b, t2.c + Filter: (t2.a IS NOT NULL) + -> Result + Output: t2.a +(7 rows) + +explain (verbose, costs off) +select t4.a from sj t1 + join sj t2 on t1.a = t2.a + join lateral (select t3.a from sj t3, (select t1.a) offset 0) t4 on true; + QUERY PLAN +------------------------------------ + Nested Loop + Output: t3.a + -> Seq Scan on public.sj t2 + Output: t2.a, t2.b, t2.c + Filter: (t2.a IS NOT NULL) + -> Seq Scan on public.sj t3 + Output: t3.a +(7 rows) + +-- Check updating of Lateral links from top-level query to the removing relation +explain (COSTS OFF) +SELECT * FROM pg_am am WHERE am.amname IN ( + SELECT c1.relname AS relname + FROM pg_class c1 + JOIN pg_class c2 + ON c1.oid=c2.oid AND c1.oid < 10 +); + QUERY PLAN +---------------------------------------------------------------- + Nested Loop Semi Join + Join Filter: (am.amname = c2.relname) + -> Seq Scan on pg_am am + -> Materialize + -> Index Scan using pg_class_oid_index on pg_class c2 + Index Cond: (oid < '10'::oid) +(6 rows) + +-- +-- SJE corner case: uniqueness of an inner is [partially] derived from +-- baserestrictinfo clauses. +-- XXX: We really should allow SJE for these corner cases? +-- +INSERT INTO sj VALUES (3, 1, 3); +-- Don't remove SJ +EXPLAIN (COSTS OFF) +SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND j1.a = 2 AND j2.a = 3; + QUERY PLAN +------------------------------ + Nested Loop + Join Filter: (j1.b = j2.b) + -> Seq Scan on sj j1 + Filter: (a = 2) + -> Seq Scan on sj j2 + Filter: (a = 3) +(6 rows) + +-- Return one row +SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND j1.a = 2 AND j2.a = 3; + a | b | c | a | b | c +---+---+---+---+---+--- + 2 | 1 | 1 | 3 | 1 | 3 +(1 row) + +-- Remove SJ, define uniqueness by a constant +EXPLAIN (COSTS OFF) +SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND j1.a = 2 AND j2.a = 2; + QUERY PLAN +----------------------------------------- + Seq Scan on sj j2 + Filter: ((b IS NOT NULL) AND (a = 2)) +(2 rows) + +-- Return one row +SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND j1.a = 2 AND j2.a = 2; + a | b | c | a | b | c +---+---+---+---+---+--- + 2 | 1 | 1 | 2 | 1 | 1 +(1 row) + +-- Remove SJ, define uniqueness by a constant expression +EXPLAIN (COSTS OFF) +SELECT * FROM sj j1, sj j2 +WHERE j1.b = j2.b + AND j1.a = (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int + AND (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int = j2.a; + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------------- + Seq Scan on sj j2 + Filter: ((b IS NOT NULL) AND (a = (((EXTRACT(dow FROM CURRENT_TIMESTAMP(0)) / '15'::numeric) + '3'::numeric))::integer)) +(2 rows) + +-- Return one row +SELECT * FROM sj j1, sj j2 +WHERE j1.b = j2.b + AND j1.a = (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int + AND (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int = j2.a; + a | b | c | a | b | c +---+---+---+---+---+--- + 3 | 1 | 3 | 3 | 1 | 3 +(1 row) + +-- Remove SJ +EXPLAIN (COSTS OFF) +SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND j1.a = 1 AND j2.a = 1; + QUERY PLAN +----------------------------------------- + Seq Scan on sj j2 + Filter: ((b IS NOT NULL) AND (a = 1)) +(2 rows) + +-- Return no rows +SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND j1.a = 1 AND j2.a = 1; + a | b | c | a | b | c +---+---+---+---+---+--- +(0 rows) + +-- Shuffle a clause. Remove SJ +EXPLAIN (COSTS OFF) +SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND 1 = j1.a AND j2.a = 1; + QUERY PLAN +----------------------------------------- + Seq Scan on sj j2 + Filter: ((b IS NOT NULL) AND (a = 1)) +(2 rows) + +-- Return no rows +SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND 1 = j1.a AND j2.a = 1; + a | b | c | a | b | c +---+---+---+---+---+--- +(0 rows) + +-- SJE Corner case: a 'a.x=a.x' clause, have replaced with 'a.x IS NOT NULL' +-- after SJ elimination it shouldn't be a mergejoinable clause. +EXPLAIN (COSTS OFF) +SELECT t4.* +FROM (SELECT t1.*, t2.a AS a1 FROM sj t1, sj t2 WHERE t1.b = t2.b) AS t3 +JOIN sj t4 ON (t4.a = t3.a) WHERE t3.a1 = 42; + QUERY PLAN +--------------------------------- + Nested Loop + Join Filter: (t1.b = t2.b) + -> Seq Scan on sj t2 + Filter: (a = 42) + -> Seq Scan on sj t1 + Filter: (a IS NOT NULL) +(6 rows) + +SELECT t4.* +FROM (SELECT t1.*, t2.a AS a1 FROM sj t1, sj t2 WHERE t1.b = t2.b) AS t3 +JOIN sj t4 ON (t4.a = t3.a) WHERE t3.a1 = 42; + a | b | c +---+---+--- +(0 rows) + +-- Functional index +CREATE UNIQUE INDEX sj_fn_idx ON sj((a * a)); +-- Remove SJ +EXPLAIN (COSTS OFF) +SELECT * FROM sj j1, sj j2 + WHERE j1.b = j2.b AND j1.a*j1.a = 1 AND j2.a*j2.a = 1; + QUERY PLAN +----------------------------------------------- + Seq Scan on sj j2 + Filter: ((b IS NOT NULL) AND ((a * a) = 1)) +(2 rows) + +-- Don't remove SJ +EXPLAIN (COSTS OFF) +SELECT * FROM sj j1, sj j2 + WHERE j1.b = j2.b AND j1.a*j1.a = 1 AND j2.a*j2.a = 2; + QUERY PLAN +------------------------------- + Nested Loop + Join Filter: (j1.b = j2.b) + -> Seq Scan on sj j1 + Filter: ((a * a) = 1) + -> Seq Scan on sj j2 + Filter: ((a * a) = 2) +(6 rows) + +-- Restriction contains expressions in both sides, Remove SJ. +EXPLAIN (COSTS OFF) +SELECT * FROM sj j1, sj j2 +WHERE j1.b = j2.b + AND (j1.a*j1.a) = (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int + AND (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int = (j2.a*j2.a); + QUERY PLAN +---------------------------------------------------------------------------------------------------------------------------------- + Seq Scan on sj j2 + Filter: ((b IS NOT NULL) AND ((a * a) = (((EXTRACT(dow FROM CURRENT_TIMESTAMP(0)) / '15'::numeric) + '3'::numeric))::integer)) +(2 rows) + +-- Empty set of rows should be returned +SELECT * FROM sj j1, sj j2 +WHERE j1.b = j2.b + AND (j1.a*j1.a) = (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int + AND (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int = (j2.a*j2.a); + a | b | c | a | b | c +---+---+---+---+---+--- +(0 rows) + +-- Restriction contains volatile function - disable SJE feature. +EXPLAIN (COSTS OFF) +SELECT * FROM sj j1, sj j2 +WHERE j1.b = j2.b + AND (j1.a*j1.c/3) = (random()/3 + 3)::int + AND (random()/3 + 3)::int = (j2.a*j2.c/3); + QUERY PLAN +----------------------------------------------------------------------------------------------------------- + Nested Loop + Join Filter: (j1.b = j2.b) + -> Seq Scan on sj j1 + Filter: (((a * c) / 3) = (((random() / '3'::double precision) + '3'::double precision))::integer) + -> Seq Scan on sj j2 + Filter: ((((random() / '3'::double precision) + '3'::double precision))::integer = ((a * c) / 3)) +(6 rows) + +-- Return one row +SELECT * FROM sj j1, sj j2 +WHERE j1.b = j2.b + AND (j1.a*j1.c/3) = (random()/3 + 3)::int + AND (random()/3 + 3)::int = (j2.a*j2.c/3); + a | b | c | a | b | c +---+---+---+---+---+--- + 3 | 1 | 3 | 3 | 1 | 3 +(1 row) + +-- Multiple filters +CREATE UNIQUE INDEX sj_temp_idx1 ON sj(a,b,c); +-- Remove SJ +EXPLAIN (COSTS OFF) +SELECT * FROM sj j1, sj j2 + WHERE j1.b = j2.b AND j1.a = 2 AND j1.c = 3 AND j2.a = 2 AND 3 = j2.c; + QUERY PLAN +----------------------------------------------------- + Seq Scan on sj j2 + Filter: ((b IS NOT NULL) AND (a = 2) AND (c = 3)) +(2 rows) + +-- Don't remove SJ +EXPLAIN (COSTS OFF) + SELECT * FROM sj j1, sj j2 + WHERE j1.b = j2.b AND 2 = j1.a AND j1.c = 3 AND j2.a = 1 AND 3 = j2.c; + QUERY PLAN +--------------------------------------- + Nested Loop + Join Filter: (j1.b = j2.b) + -> Seq Scan on sj j1 + Filter: ((2 = a) AND (c = 3)) + -> Seq Scan on sj j2 + Filter: ((c = 3) AND (a = 1)) +(6 rows) + +CREATE UNIQUE INDEX sj_temp_idx ON sj(a,b); +-- Don't remove SJ +EXPLAIN (COSTS OFF) +SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND j1.a = 2; + QUERY PLAN +------------------------------ + Nested Loop + Join Filter: (j1.b = j2.b) + -> Seq Scan on sj j1 + Filter: (a = 2) + -> Seq Scan on sj j2 +(5 rows) + +-- Don't remove SJ +EXPLAIN (COSTS OFF) +SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND 2 = j2.a; + QUERY PLAN +------------------------------ + Nested Loop + Join Filter: (j1.b = j2.b) + -> Seq Scan on sj j2 + Filter: (2 = a) + -> Seq Scan on sj j1 +(5 rows) + +-- Don't remove SJ +EXPLAIN (COSTS OFF) +SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND (j1.a = 1 OR j2.a = 1); + QUERY PLAN +--------------------------------------------------------------- + Nested Loop + Join Filter: ((j1.b = j2.b) AND ((j1.a = 1) OR (j2.a = 1))) + -> Seq Scan on sj j1 + -> Materialize + -> Seq Scan on sj j2 +(5 rows) + +DROP INDEX sj_fn_idx, sj_temp_idx1, sj_temp_idx; +-- Test that OR predicated are updated correctly after join removal +CREATE TABLE tab_with_flag ( id INT PRIMARY KEY, is_flag SMALLINT); +CREATE INDEX idx_test_is_flag ON tab_with_flag (is_flag); +EXPLAIN (COSTS OFF) +SELECT COUNT(*) FROM tab_with_flag +WHERE + (is_flag IS NULL OR is_flag = 0) + AND id IN (SELECT id FROM tab_with_flag WHERE id IN (2, 3)); + QUERY PLAN +----------------------------------------------------------- + Aggregate + -> Bitmap Heap Scan on tab_with_flag + Recheck Cond: (id = ANY ('{2,3}'::integer[])) + Filter: ((is_flag IS NULL) OR (is_flag = 0)) + -> Bitmap Index Scan on tab_with_flag_pkey + Index Cond: (id = ANY ('{2,3}'::integer[])) +(6 rows) + +DROP TABLE tab_with_flag; +-- HAVING clause +explain (costs off) +select p.b from sj p join sj q on p.a = q.a group by p.b having sum(p.a) = 1; + QUERY PLAN +--------------------------------- + HashAggregate + Group Key: q.b + Filter: (sum(q.a) = 1) + -> Seq Scan on sj q + Filter: (a IS NOT NULL) +(5 rows) + +-- update lateral references and range table entry reference +explain (verbose, costs off) +select 1 from (select x.* from sj x, sj y where x.a = y.a) q, + lateral generate_series(1, q.a) gs(i); + QUERY PLAN +------------------------------------------------------ + Nested Loop + Output: 1 + -> Seq Scan on public.sj y + Output: y.a, y.b, y.c + Filter: (y.a IS NOT NULL) + -> Function Scan on pg_catalog.generate_series gs + Output: gs.i + Function Call: generate_series(1, y.a) +(8 rows) + +explain (verbose, costs off) +select 1 from (select y.* from sj x, sj y where x.a = y.a) q, + lateral generate_series(1, q.a) gs(i); + QUERY PLAN +------------------------------------------------------ + Nested Loop + Output: 1 + -> Seq Scan on public.sj y + Output: y.a, y.b, y.c + Filter: (y.a IS NOT NULL) + -> Function Scan on pg_catalog.generate_series gs + Output: gs.i + Function Call: generate_series(1, y.a) +(8 rows) + +-- Test that a non-EC-derived join clause is processed correctly. Use an +-- outer join so that we can't form an EC. +explain (costs off) select * from sj p join sj q on p.a = q.a + left join sj r on p.a + q.a = r.a; + QUERY PLAN +------------------------------------ + Nested Loop Left Join + Join Filter: ((q.a + q.a) = r.a) + -> Seq Scan on sj q + Filter: (a IS NOT NULL) + -> Materialize + -> Seq Scan on sj r +(6 rows) + +-- FIXME this constant false filter doesn't look good. Should we merge +-- equivalence classes? +explain (costs off) +select * from sj p, sj q where p.a = q.a and p.b = 1 and q.b = 2; + QUERY PLAN +----------------------------------------------------- + Seq Scan on sj q + Filter: ((a IS NOT NULL) AND (b = 2) AND (b = 1)) +(2 rows) + +-- Check that attr_needed is updated correctly after self-join removal. In this +-- test, the join of j1 with j2 is removed. k1.b is required at either j1 or j2. +-- If this info is lost, join targetlist for (k1, k2) will not contain k1.b. +-- Use index scan for k1 so that we don't get 'b' from physical tlist used for +-- seqscan. Also disable reordering of joins because this test depends on a +-- particular join tree. +create table sk (a int, b int); +create index on sk(a); +set join_collapse_limit to 1; +set enable_seqscan to off; +explain (costs off) select 1 from + (sk k1 join sk k2 on k1.a = k2.a) + join (sj j1 join sj j2 on j1.a = j2.a) on j1.b = k1.b; + QUERY PLAN +----------------------------------------------------- + Nested Loop + Join Filter: (k1.b = j2.b) + -> Nested Loop + -> Index Scan using sk_a_idx on sk k1 + -> Index Only Scan using sk_a_idx on sk k2 + Index Cond: (a = k1.a) + -> Materialize + -> Index Scan using sj_a_key on sj j2 + Index Cond: (a IS NOT NULL) +(9 rows) + +explain (costs off) select 1 from + (sk k1 join sk k2 on k1.a = k2.a) + join (sj j1 join sj j2 on j1.a = j2.a) on j2.b = k1.b; + QUERY PLAN +----------------------------------------------------- + Nested Loop + Join Filter: (k1.b = j2.b) + -> Nested Loop + -> Index Scan using sk_a_idx on sk k1 + -> Index Only Scan using sk_a_idx on sk k2 + Index Cond: (a = k1.a) + -> Materialize + -> Index Scan using sj_a_key on sj j2 + Index Cond: (a IS NOT NULL) +(9 rows) + +reset join_collapse_limit; +reset enable_seqscan; +-- Check that clauses from the join filter list is not lost on the self-join removal +CREATE TABLE emp1 (id SERIAL PRIMARY KEY NOT NULL, code int); +EXPLAIN (VERBOSE, COSTS OFF) +SELECT * FROM emp1 e1, emp1 e2 WHERE e1.id = e2.id AND e2.code <> e1.code; + QUERY PLAN +------------------------------------------ + Seq Scan on public.emp1 e2 + Output: e2.id, e2.code, e2.id, e2.code + Filter: (e2.code <> e2.code) +(3 rows) + +-- Shuffle self-joined relations. Only in the case of iterative deletion +-- attempts explains of these queries will be identical. +CREATE UNIQUE INDEX ON emp1((id*id)); +EXPLAIN (COSTS OFF) +SELECT count(*) FROM emp1 c1, emp1 c2, emp1 c3 +WHERE c1.id=c2.id AND c1.id*c2.id=c3.id*c3.id; + QUERY PLAN +----------------------------------------- + Aggregate + -> Seq Scan on emp1 c3 + Filter: ((id * id) IS NOT NULL) +(3 rows) + +EXPLAIN (COSTS OFF) +SELECT count(*) FROM emp1 c1, emp1 c2, emp1 c3 +WHERE c1.id=c3.id AND c1.id*c3.id=c2.id*c2.id; + QUERY PLAN +----------------------------------------- + Aggregate + -> Seq Scan on emp1 c3 + Filter: ((id * id) IS NOT NULL) +(3 rows) + +EXPLAIN (COSTS OFF) +SELECT count(*) FROM emp1 c1, emp1 c2, emp1 c3 +WHERE c3.id=c2.id AND c3.id*c2.id=c1.id*c1.id; + QUERY PLAN +----------------------------------------- + Aggregate + -> Seq Scan on emp1 c3 + Filter: ((id * id) IS NOT NULL) +(3 rows) + +-- Check the usage of a parse tree by the set operations (bug #18170) +EXPLAIN (COSTS OFF) +SELECT c1.code FROM emp1 c1 LEFT JOIN emp1 c2 ON c1.id = c2.id +WHERE c2.id IS NOT NULL +EXCEPT ALL +SELECT c3.code FROM emp1 c3; + QUERY PLAN +------------------------------------------- + HashSetOp Except All + -> Append + -> Subquery Scan on "*SELECT* 1" + -> Seq Scan on emp1 c2 + -> Subquery Scan on "*SELECT* 2" + -> Seq Scan on emp1 c3 +(6 rows) + +-- Check that SJE removes references from PHVs correctly +explain (costs off) +select * from emp1 t1 left join + (select coalesce(t3.code, 1) from emp1 t2 + left join (emp1 t3 join emp1 t4 on t3.id = t4.id) + on true) +on true; + QUERY PLAN +--------------------------------------------- + Nested Loop Left Join + -> Seq Scan on emp1 t1 + -> Materialize + -> Nested Loop Left Join + -> Seq Scan on emp1 t2 + -> Materialize + -> Seq Scan on emp1 t4 +(7 rows) + +-- Check that SJE removes the whole PHVs correctly +explain (verbose, costs off) +select 1 from emp1 t1 left join + ((select 1 as x, * from emp1 t2) s1 inner join + (select * from emp1 t3) s2 on s1.id = s2.id) + on true +where s1.x = 1; + QUERY PLAN +---------------------------------------- + Nested Loop + Output: 1 + -> Seq Scan on public.emp1 t1 + Output: t1.id, t1.code + -> Materialize + Output: t3.id + -> Seq Scan on public.emp1 t3 + Output: t3.id + Filter: (1 = 1) +(9 rows) + +-- Check that PHVs do not impose any constraints on removing self joins +explain (verbose, costs off) +select * from emp1 t1 join emp1 t2 on t1.id = t2.id left join + lateral (select t1.id as t1id, * from generate_series(1,1) t3) s on true; + QUERY PLAN +---------------------------------------------------------- + Nested Loop Left Join + Output: t2.id, t2.code, t2.id, t2.code, (t2.id), t3.t3 + -> Seq Scan on public.emp1 t2 + Output: t2.id, t2.code + -> Function Scan on pg_catalog.generate_series t3 + Output: t3.t3, t2.id + Function Call: generate_series(1, 1) +(7 rows) + +explain (verbose, costs off) +select * from generate_series(1,10) t1(id) left join + lateral (select t1.id as t1id, t2.id from emp1 t2 join emp1 t3 on t2.id = t3.id) +on true; + QUERY PLAN +------------------------------------------------------ + Nested Loop Left Join + Output: t1.id, (t1.id), t3.id + -> Function Scan on pg_catalog.generate_series t1 + Output: t1.id + Function Call: generate_series(1, 10) + -> Seq Scan on public.emp1 t3 + Output: t3.id, t1.id +(7 rows) + +-- Check that SJE replaces join clauses involving the removed rel correctly +explain (costs off) +select * from emp1 t1 + inner join emp1 t2 on t1.id = t2.id + left join emp1 t3 on t1.id > 1 and t1.id < 2; + QUERY PLAN +---------------------------------------------- + Nested Loop Left Join + Join Filter: ((t2.id > 1) AND (t2.id < 2)) + -> Seq Scan on emp1 t2 + -> Materialize + -> Seq Scan on emp1 t3 +(5 rows) + +-- Check that SJE doesn't replace the target relation +EXPLAIN (COSTS OFF) +WITH t1 AS (SELECT * FROM emp1) +UPDATE emp1 SET code = t1.code + 1 FROM t1 +WHERE t1.id = emp1.id RETURNING emp1.id, emp1.code, t1.code; + QUERY PLAN +------------------------------------------------------- + Update on emp1 + -> Nested Loop + -> Seq Scan on emp1 + -> Index Scan using emp1_pkey on emp1 emp1_1 + Index Cond: (id = emp1.id) +(5 rows) + +INSERT INTO emp1 VALUES (1, 1), (2, 1); +WITH t1 AS (SELECT * FROM emp1) +UPDATE emp1 SET code = t1.code + 1 FROM t1 +WHERE t1.id = emp1.id RETURNING emp1.id, emp1.code, t1.code; + id | code | code +----+------+------ + 1 | 2 | 1 + 2 | 2 | 1 +(2 rows) + +TRUNCATE emp1; +EXPLAIN (COSTS OFF) +UPDATE sj sq SET b = 1 FROM sj as sz WHERE sq.a = sz.a; + QUERY PLAN +------------------------------------- + Update on sj sq + -> Nested Loop + Join Filter: (sq.a = sz.a) + -> Seq Scan on sj sq + -> Materialize + -> Seq Scan on sj sz +(6 rows) + +CREATE RULE sj_del_rule AS ON DELETE TO sj + DO INSTEAD + UPDATE sj SET a = 1 WHERE a = old.a; +EXPLAIN (COSTS OFF) DELETE FROM sj; + QUERY PLAN +-------------------------------------- + Update on sj sj_1 + -> Nested Loop + Join Filter: (sj.a = sj_1.a) + -> Seq Scan on sj sj_1 + -> Materialize + -> Seq Scan on sj +(6 rows) + +DROP RULE sj_del_rule ON sj CASCADE; +-- Check that SJE does not mistakenly omit qual clauses (bug #18187) +insert into emp1 values (1, 1); +explain (costs off) +select 1 from emp1 full join + (select * from emp1 t1 join + emp1 t2 join emp1 t3 on t2.id = t3.id + on true + where false) s on true +where false; + QUERY PLAN +-------------------------- + Result + One-Time Filter: false +(2 rows) + +select 1 from emp1 full join + (select * from emp1 t1 join + emp1 t2 join emp1 t3 on t2.id = t3.id + on true + where false) s on true +where false; + ?column? +---------- +(0 rows) + +-- Check that SJE does not mistakenly re-use knowledge of relation uniqueness +-- made with different set of quals +insert into emp1 values (2, 1); +explain (costs off) +select * from emp1 t1 where exists (select * from emp1 t2 + where t2.id = t1.code and t2.code > 0); + QUERY PLAN +--------------------------------------------- + Nested Loop + -> Seq Scan on emp1 t1 + -> Index Scan using emp1_pkey on emp1 t2 + Index Cond: (id = t1.code) + Filter: (code > 0) +(5 rows) + +select * from emp1 t1 where exists (select * from emp1 t2 + where t2.id = t1.code and t2.code > 0); + id | code +----+------ + 1 | 1 + 2 | 1 +(2 rows) + +-- We can remove the join even if we find the join can't duplicate rows and +-- the base quals of each side are different. In the following case we end up +-- moving quals over to s1 to make it so it can't match any rows. +create table sl(a int, b int, c int); +create unique index on sl(a, b); +vacuum analyze sl; +-- Both sides are unique, but base quals are different +explain (costs off) +select * from sl t1, sl t2 where t1.a = t2.a and t1.b = 1 and t2.b = 2; + QUERY PLAN +------------------------------ + Nested Loop + Join Filter: (t1.a = t2.a) + -> Seq Scan on sl t1 + Filter: (b = 1) + -> Seq Scan on sl t2 + Filter: (b = 2) +(6 rows) + +-- Check NullTest in baserestrictinfo list +explain (costs off) +select * from sl t1, sl t2 +where t1.a = t2.a and t1.b = 1 and t2.b = 2 + and t1.c IS NOT NULL and t2.c IS NOT NULL + and t2.b IS NOT NULL and t1.b IS NOT NULL + and t1.a IS NOT NULL and t2.a IS NOT NULL; + QUERY PLAN +--------------------------------------------------------------------------------------- + Nested Loop + Join Filter: (t1.a = t2.a) + -> Seq Scan on sl t1 + Filter: ((c IS NOT NULL) AND (b IS NOT NULL) AND (a IS NOT NULL) AND (b = 1)) + -> Seq Scan on sl t2 + Filter: ((c IS NOT NULL) AND (b IS NOT NULL) AND (a IS NOT NULL) AND (b = 2)) +(6 rows) + +explain (verbose, costs off) +select * from sl t1, sl t2 +where t1.b = t2.b and t2.a = 3 and t1.a = 3 + and t1.c IS NOT NULL and t2.c IS NOT NULL + and t2.b IS NOT NULL and t1.b IS NOT NULL + and t1.a IS NOT NULL and t2.a IS NOT NULL; + QUERY PLAN +--------------------------------------------------------------------------------------------- + Seq Scan on public.sl t2 + Output: t2.a, t2.b, t2.c, t2.a, t2.b, t2.c + Filter: ((t2.c IS NOT NULL) AND (t2.b IS NOT NULL) AND (t2.a IS NOT NULL) AND (t2.a = 3)) +(3 rows) + +-- Join qual isn't mergejoinable, but inner is unique. +EXPLAIN (COSTS OFF) +SELECT n2.a FROM sj n1, sj n2 WHERE n1.a <> n2.a AND n2.a = 1; + QUERY PLAN +------------------------------- + Nested Loop + Join Filter: (n1.a <> n2.a) + -> Seq Scan on sj n2 + Filter: (a = 1) + -> Seq Scan on sj n1 +(5 rows) + +EXPLAIN (COSTS OFF) +SELECT * FROM +(SELECT n2.a FROM sj n1, sj n2 WHERE n1.a <> n2.a) q0, sl +WHERE q0.a = 1; + QUERY PLAN +------------------------------- + Nested Loop + Join Filter: (n1.a <> n2.a) + -> Nested Loop + -> Seq Scan on sl + -> Seq Scan on sj n2 + Filter: (a = 1) + -> Seq Scan on sj n1 +(7 rows) + +-- Check optimization disabling if it will violate special join conditions. +-- Two identical joined relations satisfies self join removal conditions but +-- stay in different special join infos. +CREATE TABLE sj_t1 (id serial, a int); +CREATE TABLE sj_t2 (id serial, a int); +CREATE TABLE sj_t3 (id serial, a int); +CREATE TABLE sj_t4 (id serial, a int); +CREATE UNIQUE INDEX ON sj_t3 USING btree (a,id); +CREATE UNIQUE INDEX ON sj_t2 USING btree (id); +EXPLAIN (COSTS OFF) +SELECT * FROM sj_t1 +JOIN ( + SELECT sj_t2.id AS id FROM sj_t2 + WHERE EXISTS + ( + SELECT TRUE FROM sj_t3,sj_t4 WHERE sj_t3.a = 1 AND sj_t3.id = sj_t2.id + ) + ) t2t3t4 +ON sj_t1.id = t2t3t4.id +JOIN ( + SELECT sj_t2.id AS id FROM sj_t2 + WHERE EXISTS + ( + SELECT TRUE FROM sj_t3,sj_t4 WHERE sj_t3.a = 1 AND sj_t3.id = sj_t2.id + ) + ) _t2t3t4 +ON sj_t1.id = _t2t3t4.id; + QUERY PLAN +------------------------------------------------------------------------------------- + Nested Loop + Join Filter: (sj_t3.id = sj_t1.id) + -> Nested Loop + Join Filter: (sj_t2.id = sj_t3.id) + -> Nested Loop Semi Join + -> Nested Loop + -> HashAggregate + Group Key: sj_t3.id + -> Nested Loop + -> Seq Scan on sj_t4 + -> Materialize + -> Bitmap Heap Scan on sj_t3 + Recheck Cond: (a = 1) + -> Bitmap Index Scan on sj_t3_a_id_idx + Index Cond: (a = 1) + -> Index Only Scan using sj_t2_id_idx on sj_t2 sj_t2_1 + Index Cond: (id = sj_t3.id) + -> Nested Loop + -> Index Only Scan using sj_t3_a_id_idx on sj_t3 sj_t3_1 + Index Cond: ((a = 1) AND (id = sj_t3.id)) + -> Seq Scan on sj_t4 sj_t4_1 + -> Index Only Scan using sj_t2_id_idx on sj_t2 + Index Cond: (id = sj_t2_1.id) + -> Seq Scan on sj_t1 +(24 rows) + +-- +-- Test RowMarks-related code +-- +-- Both sides have explicit LockRows marks +EXPLAIN (COSTS OFF) +SELECT a1.a FROM sj a1,sj a2 WHERE (a1.a=a2.a) FOR UPDATE; + QUERY PLAN +--------------------------------- + LockRows + -> Seq Scan on sj a2 + Filter: (a IS NOT NULL) +(3 rows) + +reset enable_hashjoin; +reset enable_mergejoin; -- -- Test hints given on incorrect column references are useful -- diff --git a/src/test/regress/expected/sysviews.out b/src/test/regress/expected/sysviews.out index 729620de..9da8c501 100644 --- a/src/test/regress/expected/sysviews.out +++ b/src/test/regress/expected/sysviews.out @@ -153,10 +153,11 @@ select name, setting from pg_settings where name like 'enable%'; enable_partitionwise_aggregate | off enable_partitionwise_join | off enable_presorted_aggregate | on + enable_self_join_removal | on enable_seqscan | on enable_sort | on enable_tidscan | on -(22 rows) +(23 rows) -- There are always wait event descriptions for various types. InjectionPoint -- may be present or absent, depending on history since last postmaster start. diff --git a/src/test/regress/sql/equivclass.sql b/src/test/regress/sql/equivclass.sql index 247b0a31..77dd964e 100644 --- a/src/test/regress/sql/equivclass.sql +++ b/src/test/regress/sql/equivclass.sql @@ -259,6 +259,22 @@ drop user regress_user_ectest; explain (costs off) select * from tenk1 where unique1 = unique1 and unique2 = unique2; +-- Test that broken ECs are processed correctly during self join removal. +-- Disable merge joins so that we don't get an error about missing commutator. +-- Test both orientations of the join clause, because only one of them breaks +-- the EC. +set enable_mergejoin to off; + +explain (costs off) + select * from ec0 m join ec0 n on m.ff = n.ff + join ec1 p on m.ff + n.ff = p.f1; + +explain (costs off) + select * from ec0 m join ec0 n on m.ff = n.ff + join ec1 p on p.f1::int8 = (m.ff + n.ff)::int8alias1; + +reset enable_mergejoin; + -- this could be converted, but isn't at present explain (costs off) select * from tenk1 where unique1 = unique1 or unique2 = unique2; diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql index d81ff63b..3e94e0af 100644 --- a/src/test/regress/sql/join.sql +++ b/src/test/regress/sql/join.sql @@ -2345,6 +2345,492 @@ select * from select * from int8_tbl x join (int4_tbl x cross join int4_tbl y(ff)) j on q1 = f1; -- ok +-- +-- test that semi- or inner self-joins on a unique column are removed +-- + +-- enable only nestloop to get more predictable plans +set enable_hashjoin to off; +set enable_mergejoin to off; + +create table sj (a int unique, b int, c int unique); +insert into sj values (1, null, 2), (null, 2, null), (2, 1, 1); +analyze sj; + +-- Trivial self-join case. +explain (costs off) +select p.* from sj p, sj q where q.a = p.a and q.b = q.a - 1; +select p.* from sj p, sj q where q.a = p.a and q.b = q.a - 1; + +-- Self-join removal performs after a subquery pull-up process and could remove +-- such kind of self-join too. Check this option. +explain (costs off) +select * from sj p +where exists (select * from sj q + where q.a = p.a and q.b < 10); +select * from sj p +where exists (select * from sj q + where q.a = p.a and q.b < 10); + +-- Don't remove self-join for the case of equality of two different unique columns. +explain (costs off) +select * from sj t1, sj t2 where t1.a = t2.c and t1.b is not null; + +-- Ensure that relations with TABLESAMPLE clauses are not considered as +-- candidates to be removed +explain (costs off) +select * from sj t1 + join lateral + (select * from sj tablesample system(t1.b)) s + on t1.a = s.a; + +-- Ensure that SJE does not form a self-referential lateral dependency +explain (costs off) +select * from sj t1 + left join lateral + (select t1.a as t1a, * from sj t2) s + on true +where t1.a = s.a; + +-- Degenerated case. +explain (costs off) +select * from + (select a as x from sj where false) as q1, + (select a as y from sj where false) as q2 +where q1.x = q2.y; + +-- We can't use a cross-EC generated self join qual because of current logic of +-- the generate_join_implied_equalities routine. +explain (costs off) +select * from sj t1, sj t2 where t1.a = t1.b and t1.b = t2.b and t2.b = t2.a; +explain (costs off) +select * from sj t1, sj t2, sj t3 +where t1.a = t1.b and t1.b = t2.b and t2.b = t2.a and + t1.b = t3.b and t3.b = t3.a; + +-- Double self-join removal. +-- Use a condition on "b + 1", not on "b", for the second join, so that +-- the equivalence class is different from the first one, and we can +-- test the non-ec code path. +explain (costs off) +select * +from sj t1 + join sj t2 on t1.a = t2.a and t1.b = t2.b + join sj t3 on t2.a = t3.a and t2.b + 1 = t3.b + 1; + +-- subselect that references the removed relation +explain (costs off) +select t1.a, (select a from sj where a = t2.a and a = t1.a) +from sj t1, sj t2 +where t1.a = t2.a; + +-- self-join under outer join +explain (costs off) +select * from sj x join sj y on x.a = y.a +left join int8_tbl z on x.a = z.q1; + +explain (costs off) +select * from sj x join sj y on x.a = y.a +left join int8_tbl z on y.a = z.q1; + +explain (costs off) +select * from ( + select t1.*, t2.a as ax from sj t1 join sj t2 + on (t1.a = t2.a and t1.c * t1.c = t2.c + 2 and t2.b is null) +) as q1 +left join + (select t3.* from sj t3, sj t4 where t3.c = t4.c) as q2 +on q1.ax = q2.a; + +-- Test that placeholders are updated correctly after join removal +explain (costs off) +select * from (values (1)) x +left join (select coalesce(y.q1, 1) from int8_tbl y + right join sj j1 inner join sj j2 on j1.a = j2.a + on true) z +on true; + +-- Test that references to the removed rel in lateral subqueries are replaced +-- correctly after join removal +explain (verbose, costs off) +select t3.a from sj t1 + join sj t2 on t1.a = t2.a + join lateral (select t1.a offset 0) t3 on true; + +explain (verbose, costs off) +select t3.a from sj t1 + join sj t2 on t1.a = t2.a + join lateral (select * from (select t1.a offset 0) offset 0) t3 on true; + +explain (verbose, costs off) +select t4.a from sj t1 + join sj t2 on t1.a = t2.a + join lateral (select t3.a from sj t3, (select t1.a) offset 0) t4 on true; + +-- Check updating of Lateral links from top-level query to the removing relation +explain (COSTS OFF) +SELECT * FROM pg_am am WHERE am.amname IN ( + SELECT c1.relname AS relname + FROM pg_class c1 + JOIN pg_class c2 + ON c1.oid=c2.oid AND c1.oid < 10 +); + +-- +-- SJE corner case: uniqueness of an inner is [partially] derived from +-- baserestrictinfo clauses. +-- XXX: We really should allow SJE for these corner cases? +-- + +INSERT INTO sj VALUES (3, 1, 3); + +-- Don't remove SJ +EXPLAIN (COSTS OFF) +SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND j1.a = 2 AND j2.a = 3; +-- Return one row +SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND j1.a = 2 AND j2.a = 3; + +-- Remove SJ, define uniqueness by a constant +EXPLAIN (COSTS OFF) +SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND j1.a = 2 AND j2.a = 2; +-- Return one row +SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND j1.a = 2 AND j2.a = 2; + +-- Remove SJ, define uniqueness by a constant expression +EXPLAIN (COSTS OFF) +SELECT * FROM sj j1, sj j2 +WHERE j1.b = j2.b + AND j1.a = (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int + AND (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int = j2.a; +-- Return one row +SELECT * FROM sj j1, sj j2 +WHERE j1.b = j2.b + AND j1.a = (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int + AND (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int = j2.a; + +-- Remove SJ +EXPLAIN (COSTS OFF) +SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND j1.a = 1 AND j2.a = 1; +-- Return no rows +SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND j1.a = 1 AND j2.a = 1; + +-- Shuffle a clause. Remove SJ +EXPLAIN (COSTS OFF) +SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND 1 = j1.a AND j2.a = 1; +-- Return no rows +SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND 1 = j1.a AND j2.a = 1; + +-- SJE Corner case: a 'a.x=a.x' clause, have replaced with 'a.x IS NOT NULL' +-- after SJ elimination it shouldn't be a mergejoinable clause. +EXPLAIN (COSTS OFF) +SELECT t4.* +FROM (SELECT t1.*, t2.a AS a1 FROM sj t1, sj t2 WHERE t1.b = t2.b) AS t3 +JOIN sj t4 ON (t4.a = t3.a) WHERE t3.a1 = 42; +SELECT t4.* +FROM (SELECT t1.*, t2.a AS a1 FROM sj t1, sj t2 WHERE t1.b = t2.b) AS t3 +JOIN sj t4 ON (t4.a = t3.a) WHERE t3.a1 = 42; + +-- Functional index +CREATE UNIQUE INDEX sj_fn_idx ON sj((a * a)); + +-- Remove SJ +EXPLAIN (COSTS OFF) +SELECT * FROM sj j1, sj j2 + WHERE j1.b = j2.b AND j1.a*j1.a = 1 AND j2.a*j2.a = 1; +-- Don't remove SJ +EXPLAIN (COSTS OFF) +SELECT * FROM sj j1, sj j2 + WHERE j1.b = j2.b AND j1.a*j1.a = 1 AND j2.a*j2.a = 2; + +-- Restriction contains expressions in both sides, Remove SJ. +EXPLAIN (COSTS OFF) +SELECT * FROM sj j1, sj j2 +WHERE j1.b = j2.b + AND (j1.a*j1.a) = (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int + AND (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int = (j2.a*j2.a); +-- Empty set of rows should be returned +SELECT * FROM sj j1, sj j2 +WHERE j1.b = j2.b + AND (j1.a*j1.a) = (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int + AND (EXTRACT(DOW FROM current_timestamp(0))/15 + 3)::int = (j2.a*j2.a); + +-- Restriction contains volatile function - disable SJE feature. +EXPLAIN (COSTS OFF) +SELECT * FROM sj j1, sj j2 +WHERE j1.b = j2.b + AND (j1.a*j1.c/3) = (random()/3 + 3)::int + AND (random()/3 + 3)::int = (j2.a*j2.c/3); +-- Return one row +SELECT * FROM sj j1, sj j2 +WHERE j1.b = j2.b + AND (j1.a*j1.c/3) = (random()/3 + 3)::int + AND (random()/3 + 3)::int = (j2.a*j2.c/3); + +-- Multiple filters +CREATE UNIQUE INDEX sj_temp_idx1 ON sj(a,b,c); + +-- Remove SJ +EXPLAIN (COSTS OFF) +SELECT * FROM sj j1, sj j2 + WHERE j1.b = j2.b AND j1.a = 2 AND j1.c = 3 AND j2.a = 2 AND 3 = j2.c; + +-- Don't remove SJ +EXPLAIN (COSTS OFF) + SELECT * FROM sj j1, sj j2 + WHERE j1.b = j2.b AND 2 = j1.a AND j1.c = 3 AND j2.a = 1 AND 3 = j2.c; + +CREATE UNIQUE INDEX sj_temp_idx ON sj(a,b); + +-- Don't remove SJ +EXPLAIN (COSTS OFF) +SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND j1.a = 2; + +-- Don't remove SJ +EXPLAIN (COSTS OFF) +SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND 2 = j2.a; + +-- Don't remove SJ +EXPLAIN (COSTS OFF) +SELECT * FROM sj j1, sj j2 WHERE j1.b = j2.b AND (j1.a = 1 OR j2.a = 1); + +DROP INDEX sj_fn_idx, sj_temp_idx1, sj_temp_idx; + +-- Test that OR predicated are updated correctly after join removal +CREATE TABLE tab_with_flag ( id INT PRIMARY KEY, is_flag SMALLINT); +CREATE INDEX idx_test_is_flag ON tab_with_flag (is_flag); + +EXPLAIN (COSTS OFF) +SELECT COUNT(*) FROM tab_with_flag +WHERE + (is_flag IS NULL OR is_flag = 0) + AND id IN (SELECT id FROM tab_with_flag WHERE id IN (2, 3)); +DROP TABLE tab_with_flag; + +-- HAVING clause +explain (costs off) +select p.b from sj p join sj q on p.a = q.a group by p.b having sum(p.a) = 1; + +-- update lateral references and range table entry reference +explain (verbose, costs off) +select 1 from (select x.* from sj x, sj y where x.a = y.a) q, + lateral generate_series(1, q.a) gs(i); + +explain (verbose, costs off) +select 1 from (select y.* from sj x, sj y where x.a = y.a) q, + lateral generate_series(1, q.a) gs(i); + +-- Test that a non-EC-derived join clause is processed correctly. Use an +-- outer join so that we can't form an EC. +explain (costs off) select * from sj p join sj q on p.a = q.a + left join sj r on p.a + q.a = r.a; + +-- FIXME this constant false filter doesn't look good. Should we merge +-- equivalence classes? +explain (costs off) +select * from sj p, sj q where p.a = q.a and p.b = 1 and q.b = 2; + +-- Check that attr_needed is updated correctly after self-join removal. In this +-- test, the join of j1 with j2 is removed. k1.b is required at either j1 or j2. +-- If this info is lost, join targetlist for (k1, k2) will not contain k1.b. +-- Use index scan for k1 so that we don't get 'b' from physical tlist used for +-- seqscan. Also disable reordering of joins because this test depends on a +-- particular join tree. +create table sk (a int, b int); +create index on sk(a); +set join_collapse_limit to 1; +set enable_seqscan to off; +explain (costs off) select 1 from + (sk k1 join sk k2 on k1.a = k2.a) + join (sj j1 join sj j2 on j1.a = j2.a) on j1.b = k1.b; +explain (costs off) select 1 from + (sk k1 join sk k2 on k1.a = k2.a) + join (sj j1 join sj j2 on j1.a = j2.a) on j2.b = k1.b; +reset join_collapse_limit; +reset enable_seqscan; + +-- Check that clauses from the join filter list is not lost on the self-join removal +CREATE TABLE emp1 (id SERIAL PRIMARY KEY NOT NULL, code int); +EXPLAIN (VERBOSE, COSTS OFF) +SELECT * FROM emp1 e1, emp1 e2 WHERE e1.id = e2.id AND e2.code <> e1.code; + +-- Shuffle self-joined relations. Only in the case of iterative deletion +-- attempts explains of these queries will be identical. +CREATE UNIQUE INDEX ON emp1((id*id)); + +EXPLAIN (COSTS OFF) +SELECT count(*) FROM emp1 c1, emp1 c2, emp1 c3 +WHERE c1.id=c2.id AND c1.id*c2.id=c3.id*c3.id; + +EXPLAIN (COSTS OFF) +SELECT count(*) FROM emp1 c1, emp1 c2, emp1 c3 +WHERE c1.id=c3.id AND c1.id*c3.id=c2.id*c2.id; + +EXPLAIN (COSTS OFF) +SELECT count(*) FROM emp1 c1, emp1 c2, emp1 c3 +WHERE c3.id=c2.id AND c3.id*c2.id=c1.id*c1.id; + +-- Check the usage of a parse tree by the set operations (bug #18170) +EXPLAIN (COSTS OFF) +SELECT c1.code FROM emp1 c1 LEFT JOIN emp1 c2 ON c1.id = c2.id +WHERE c2.id IS NOT NULL +EXCEPT ALL +SELECT c3.code FROM emp1 c3; + +-- Check that SJE removes references from PHVs correctly +explain (costs off) +select * from emp1 t1 left join + (select coalesce(t3.code, 1) from emp1 t2 + left join (emp1 t3 join emp1 t4 on t3.id = t4.id) + on true) +on true; + +-- Check that SJE removes the whole PHVs correctly +explain (verbose, costs off) +select 1 from emp1 t1 left join + ((select 1 as x, * from emp1 t2) s1 inner join + (select * from emp1 t3) s2 on s1.id = s2.id) + on true +where s1.x = 1; + +-- Check that PHVs do not impose any constraints on removing self joins +explain (verbose, costs off) +select * from emp1 t1 join emp1 t2 on t1.id = t2.id left join + lateral (select t1.id as t1id, * from generate_series(1,1) t3) s on true; + +explain (verbose, costs off) +select * from generate_series(1,10) t1(id) left join + lateral (select t1.id as t1id, t2.id from emp1 t2 join emp1 t3 on t2.id = t3.id) +on true; + +-- Check that SJE replaces join clauses involving the removed rel correctly +explain (costs off) +select * from emp1 t1 + inner join emp1 t2 on t1.id = t2.id + left join emp1 t3 on t1.id > 1 and t1.id < 2; + +-- Check that SJE doesn't replace the target relation +EXPLAIN (COSTS OFF) +WITH t1 AS (SELECT * FROM emp1) +UPDATE emp1 SET code = t1.code + 1 FROM t1 +WHERE t1.id = emp1.id RETURNING emp1.id, emp1.code, t1.code; + +INSERT INTO emp1 VALUES (1, 1), (2, 1); + +WITH t1 AS (SELECT * FROM emp1) +UPDATE emp1 SET code = t1.code + 1 FROM t1 +WHERE t1.id = emp1.id RETURNING emp1.id, emp1.code, t1.code; + +TRUNCATE emp1; + +EXPLAIN (COSTS OFF) +UPDATE sj sq SET b = 1 FROM sj as sz WHERE sq.a = sz.a; + +CREATE RULE sj_del_rule AS ON DELETE TO sj + DO INSTEAD + UPDATE sj SET a = 1 WHERE a = old.a; +EXPLAIN (COSTS OFF) DELETE FROM sj; +DROP RULE sj_del_rule ON sj CASCADE; + +-- Check that SJE does not mistakenly omit qual clauses (bug #18187) +insert into emp1 values (1, 1); +explain (costs off) +select 1 from emp1 full join + (select * from emp1 t1 join + emp1 t2 join emp1 t3 on t2.id = t3.id + on true + where false) s on true +where false; +select 1 from emp1 full join + (select * from emp1 t1 join + emp1 t2 join emp1 t3 on t2.id = t3.id + on true + where false) s on true +where false; + +-- Check that SJE does not mistakenly re-use knowledge of relation uniqueness +-- made with different set of quals +insert into emp1 values (2, 1); +explain (costs off) +select * from emp1 t1 where exists (select * from emp1 t2 + where t2.id = t1.code and t2.code > 0); +select * from emp1 t1 where exists (select * from emp1 t2 + where t2.id = t1.code and t2.code > 0); + +-- We can remove the join even if we find the join can't duplicate rows and +-- the base quals of each side are different. In the following case we end up +-- moving quals over to s1 to make it so it can't match any rows. +create table sl(a int, b int, c int); +create unique index on sl(a, b); +vacuum analyze sl; + +-- Both sides are unique, but base quals are different +explain (costs off) +select * from sl t1, sl t2 where t1.a = t2.a and t1.b = 1 and t2.b = 2; + +-- Check NullTest in baserestrictinfo list +explain (costs off) +select * from sl t1, sl t2 +where t1.a = t2.a and t1.b = 1 and t2.b = 2 + and t1.c IS NOT NULL and t2.c IS NOT NULL + and t2.b IS NOT NULL and t1.b IS NOT NULL + and t1.a IS NOT NULL and t2.a IS NOT NULL; +explain (verbose, costs off) +select * from sl t1, sl t2 +where t1.b = t2.b and t2.a = 3 and t1.a = 3 + and t1.c IS NOT NULL and t2.c IS NOT NULL + and t2.b IS NOT NULL and t1.b IS NOT NULL + and t1.a IS NOT NULL and t2.a IS NOT NULL; + +-- Join qual isn't mergejoinable, but inner is unique. +EXPLAIN (COSTS OFF) +SELECT n2.a FROM sj n1, sj n2 WHERE n1.a <> n2.a AND n2.a = 1; + +EXPLAIN (COSTS OFF) +SELECT * FROM +(SELECT n2.a FROM sj n1, sj n2 WHERE n1.a <> n2.a) q0, sl +WHERE q0.a = 1; + +-- Check optimization disabling if it will violate special join conditions. +-- Two identical joined relations satisfies self join removal conditions but +-- stay in different special join infos. +CREATE TABLE sj_t1 (id serial, a int); +CREATE TABLE sj_t2 (id serial, a int); +CREATE TABLE sj_t3 (id serial, a int); +CREATE TABLE sj_t4 (id serial, a int); + +CREATE UNIQUE INDEX ON sj_t3 USING btree (a,id); +CREATE UNIQUE INDEX ON sj_t2 USING btree (id); + +EXPLAIN (COSTS OFF) +SELECT * FROM sj_t1 +JOIN ( + SELECT sj_t2.id AS id FROM sj_t2 + WHERE EXISTS + ( + SELECT TRUE FROM sj_t3,sj_t4 WHERE sj_t3.a = 1 AND sj_t3.id = sj_t2.id + ) + ) t2t3t4 +ON sj_t1.id = t2t3t4.id +JOIN ( + SELECT sj_t2.id AS id FROM sj_t2 + WHERE EXISTS + ( + SELECT TRUE FROM sj_t3,sj_t4 WHERE sj_t3.a = 1 AND sj_t3.id = sj_t2.id + ) + ) _t2t3t4 +ON sj_t1.id = _t2t3t4.id; + +-- +-- Test RowMarks-related code +-- + +-- Both sides have explicit LockRows marks +EXPLAIN (COSTS OFF) +SELECT a1.a FROM sj a1,sj a2 WHERE (a1.a=a2.a) FOR UPDATE; + +reset enable_hashjoin; +reset enable_mergejoin; + -- -- Test hints given on incorrect column references are useful -- diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 635e6d6e..74091ba4 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -390,6 +390,7 @@ CatalogId CatalogIdMapEntry CatalogIndexState ChangeVarNodes_context +ReplaceVarnoContext CheckPoint CheckPointStmt CheckpointStatsData @@ -2565,6 +2566,7 @@ SeenRelsEntry SelectLimit SelectStmt Selectivity +SelfJoinCandidate SemTPadded SemiAntiJoinFactors SeqScan @@ -3997,6 +3999,7 @@ unicodeStyleColumnFormat unicodeStyleFormat unicodeStyleRowFormat unicode_linestyle +UniqueRelInfo unit_conversion unlogged_relation_entry utf_local_conversion_func -- 2.34.1