diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 7b52dad..f5c5562 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -124,6 +124,9 @@ static double preprocess_limit(PlannerInfo *root, int64 *offset_est, int64 *count_est); static bool limit_needed(Query *parse); static void remove_useless_groupby_columns(PlannerInfo *root); +static void remove_useless_distinct_columns(PlannerInfo *root); +static List *remove_functionally_dependant_clauses(PlannerInfo *root, + List *clauselist); static List *preprocess_groupclause(PlannerInfo *root, List *force); static List *extract_rollup_sets(List *groupingSets); static List *reorder_grouping_sets(List *groupingSets, List *sortclause); @@ -870,6 +873,9 @@ subquery_planner(PlannerGlobal *glob, Query *parse, /* Remove any redundant GROUP BY columns */ remove_useless_groupby_columns(root); + /* Likewise for redundant DISTINCT columns */ + remove_useless_distinct_columns(root); + /* * If we have any outer joins, try to reduce them to plain inner joins. * This step is most easily done after we've done expression @@ -2827,10 +2833,6 @@ static void remove_useless_groupby_columns(PlannerInfo *root) { Query *parse = root->parse; - Bitmapset **groupbyattnos; - Bitmapset **surplusvars; - ListCell *lc; - int relid; /* No chance to do anything if there are less than two GROUP BY items */ if (list_length(parse->groupClause) < 2) @@ -2840,14 +2842,54 @@ remove_useless_groupby_columns(PlannerInfo *root) if (parse->groupingSets) return; + parse->groupClause = remove_functionally_dependant_clauses(root, + parse->groupClause); +} + +/* + * remove_useless_distinct_columns + * Similar to remove_useless_groupby_columns but for the DISTINCT clause + */ +static void +remove_useless_distinct_columns(PlannerInfo *root) +{ + Query *parse = root->parse; + + /* No chance to do anything if there are less than two DISTINCT items */ + if (list_length(parse->distinctClause) < 2) + return; + + parse->distinctClause = remove_functionally_dependant_clauses(root, + parse->distinctClause); +} + +/* + * remove_functionally_dependant_clauses + * Processes clauselist and removes any items which are deemed to be + * functionally dependant on other clauselist items. + * + * If any item from the list can be removed, then a new list is built which + * does not contain the removed items. If no item can be removed then the + * original list is returned. + */ +static List * +remove_functionally_dependant_clauses(PlannerInfo *root, + List *clauselist) +{ + Query *parse = root->parse; + Bitmapset **clauseattnos; + Bitmapset **surplusvars; + ListCell *lc; + int relid; + /* - * Scan the GROUP BY clause to find GROUP BY items that are simple Vars. - * Fill groupbyattnos[k] with a bitmapset of the column attnos of RTE k - * that are GROUP BY items. + * Scan the clauselist to find items that are simple Vars. Fill + * clauseattnos[k] with a bitmapset of the column attnos of RTE k that are + * in the clauselist. */ - groupbyattnos = (Bitmapset **) palloc0(sizeof(Bitmapset *) * + clauseattnos = (Bitmapset **) palloc0(sizeof(Bitmapset *) * (list_length(parse->rtable) + 1)); - foreach(lc, parse->groupClause) + foreach(lc, clauselist) { SortGroupClause *sgc = lfirst_node(SortGroupClause, lc); TargetEntry *tle = get_sortgroupclause_tle(sgc, parse->targetList); @@ -2857,9 +2899,9 @@ remove_useless_groupby_columns(PlannerInfo *root) * Ignore non-Vars and Vars from other query levels. * * XXX in principle, stable expressions containing Vars could also be - * removed, if all the Vars are functionally dependent on other GROUP - * BY items. But it's not clear that such cases occur often enough to - * be worth troubling over. + * removed, if all the Vars are functionally dependent on other items + * in the clauselist. But it's not clear that such cases occur often + * enough to be worth troubling over. */ if (!IsA(var, Var) || var->varlevelsup > 0) @@ -2868,15 +2910,16 @@ remove_useless_groupby_columns(PlannerInfo *root) /* OK, remember we have this Var */ relid = var->varno; Assert(relid <= list_length(parse->rtable)); - groupbyattnos[relid] = bms_add_member(groupbyattnos[relid], - var->varattno - FirstLowInvalidHeapAttributeNumber); + clauseattnos[relid] = bms_add_member(clauseattnos[relid], + var->varattno - FirstLowInvalidHeapAttributeNumber); } /* * Consider each relation and see if it is possible to remove some of its - * Vars from GROUP BY. For simplicity and speed, we do the actual removal - * in a separate pass. Here, we just fill surplusvars[k] with a bitmapset - * of the column attnos of RTE k that are removable GROUP BY items. + * Vars from the clauselist. For simplicity and speed, we do the actual + * removal in a separate pass. Here, we just fill surplusvars[k] with a + * bitmapset of the column attnos of RTE k that are removable clauselist + * items. */ surplusvars = NULL; /* don't allocate array unless required */ relid = 0; @@ -2893,8 +2936,8 @@ remove_useless_groupby_columns(PlannerInfo *root) if (rte->rtekind != RTE_RELATION) continue; - /* Nothing to do unless this rel has multiple Vars in GROUP BY */ - relattnos = groupbyattnos[relid]; + /* Nothing to do unless this rel has multiple Vars in clauselist */ + relattnos = clauseattnos[relid]; if (bms_membership(relattnos) != BMS_MULTIPLE) continue; @@ -2908,7 +2951,7 @@ remove_useless_groupby_columns(PlannerInfo *root) /* * If the primary key is a proper subset of relattnos then we have - * some items in the GROUP BY that can be removed. + * some items in the clauselist that can be removed. */ if (bms_subset_compare(pkattnos, relattnos) == BMS_SUBSET1) { @@ -2930,15 +2973,15 @@ remove_useless_groupby_columns(PlannerInfo *root) } /* - * If we found any surplus Vars, build a new GROUP BY clause without them. + * If we found any surplus Vars, build a new clause list without them. * (Note: this may leave some TLEs with unreferenced ressortgroupref * markings, but that's harmless.) */ if (surplusvars != NULL) { - List *new_groupby = NIL; + List *new_clauselist = NIL; - foreach(lc, parse->groupClause) + foreach(lc, clauselist) { SortGroupClause *sgc = lfirst_node(SortGroupClause, lc); TargetEntry *tle = get_sortgroupclause_tle(sgc, parse->targetList); @@ -2952,11 +2995,14 @@ remove_useless_groupby_columns(PlannerInfo *root) var->varlevelsup > 0 || !bms_is_member(var->varattno - FirstLowInvalidHeapAttributeNumber, surplusvars[var->varno])) - new_groupby = lappend(new_groupby, sgc); + new_clauselist = lappend(new_clauselist, sgc); } - parse->groupClause = new_groupby; + return new_clauselist; } + + /* nothing to change, just return the old list */ + return clauselist; } /* diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out index dbce7d3..d77c2e4 100644 --- a/src/test/regress/expected/aggregates.out +++ b/src/test/regress/expected/aggregates.out @@ -1017,6 +1017,55 @@ explain (costs off) select * from t3 group by a,b,c; -> Seq Scan on t3 (3 rows) +-- +-- Test removal of redundant DISTINCT columns +-- +-- Non-primary-key columns can be removed from DISTINCT clause +explain (costs off) select distinct a,b,c,d from t1; + QUERY PLAN +---------------------- + HashAggregate + Group Key: a, b + -> Seq Scan on t1 +(3 rows) + +-- No removal can happen if the complete PK is not present in DISTINCT clause +explain (costs off) select distinct a,c,d from t1; + QUERY PLAN +---------------------- + HashAggregate + Group Key: a, c, d + -> Seq Scan on t1 +(3 rows) + +-- Test removal across multiple relations +explain (costs off) select distinct t1.a,t1.b,t1.c,t1.d,t2.x,t2.y,t2.z +from t1 inner join t2 on t1.a = t2.x and t1.b = t2.y; + QUERY PLAN +------------------------------------------------------ + HashAggregate + Group Key: t1.a, t1.b, t2.x, t2.y + -> Hash Join + Hash Cond: ((t2.x = t1.a) AND (t2.y = t1.b)) + -> Seq Scan on t2 + -> Hash + -> Seq Scan on t1 +(7 rows) + +-- Test case where t1 can be optimized but not t2 +explain (costs off) select distinct t1.a,t1.b,t1.c,t1.d,t2.x,t2.z +from t1 inner join t2 on t1.a = t2.x and t1.b = t2.y; + QUERY PLAN +------------------------------------------------------ + HashAggregate + Group Key: t1.a, t1.b, t2.x, t2.z + -> Hash Join + Hash Cond: ((t2.x = t1.a) AND (t2.y = t1.b)) + -> Seq Scan on t2 + -> Hash + -> Seq Scan on t1 +(7 rows) + drop table t1; drop table t2; drop table t3; diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out index 02e7d56..832e620 100644 --- a/src/test/regress/expected/join.out +++ b/src/test/regress/expected/join.out @@ -4050,18 +4050,17 @@ select d.* from d left join (select * from b group by b.id, b.c_id) s explain (costs off) select d.* from d left join (select distinct * from b) s on d.a = s.id; - QUERY PLAN --------------------------------------- - Merge Right Join - Merge Cond: (b.id = d.a) - -> Unique - -> Sort - Sort Key: b.id, b.c_id - -> Seq Scan on b - -> Sort - Sort Key: d.a - -> Seq Scan on d -(9 rows) + QUERY PLAN +--------------------------------------- + Hash Left Join + Hash Cond: (d.a = s.id) + -> Seq Scan on d + -> Hash + -> Subquery Scan on s + -> HashAggregate + Group Key: b.id + -> Seq Scan on b +(8 rows) -- check join removal works when uniqueness of the join condition is enforced -- by a UNION diff --git a/src/test/regress/sql/aggregates.sql b/src/test/regress/sql/aggregates.sql index 6c9b86a..230c575 100644 --- a/src/test/regress/sql/aggregates.sql +++ b/src/test/regress/sql/aggregates.sql @@ -362,6 +362,23 @@ group by t1.a,t1.b,t1.c,t1.d,t2.x,t2.z; -- Cannot optimize when PK is deferrable explain (costs off) select * from t3 group by a,b,c; +-- +-- Test removal of redundant DISTINCT columns +-- +-- Non-primary-key columns can be removed from DISTINCT clause +explain (costs off) select distinct a,b,c,d from t1; + +-- No removal can happen if the complete PK is not present in DISTINCT clause +explain (costs off) select distinct a,c,d from t1; + +-- Test removal across multiple relations +explain (costs off) select distinct t1.a,t1.b,t1.c,t1.d,t2.x,t2.y,t2.z +from t1 inner join t2 on t1.a = t2.x and t1.b = t2.y; + +-- Test case where t1 can be optimized but not t2 +explain (costs off) select distinct t1.a,t1.b,t1.c,t1.d,t2.x,t2.z +from t1 inner join t2 on t1.a = t2.x and t1.b = t2.y; + drop table t1; drop table t2; drop table t3;