diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c index 0276f45..3ccf713 100644 --- a/src/backend/commands/explain.c +++ b/src/backend/commands/explain.c @@ -960,6 +960,10 @@ ExplainNode(PlanState *planstate, List *ancestors, pname = "GroupAggregate"; strategy = "Sorted"; break; + case AGG_CHAINED: + pname = "ChainAggregate"; + strategy = "Chained"; + break; case AGG_HASHED: pname = "HashAggregate"; strategy = "Hashed"; diff --git a/src/backend/executor/execUtils.c b/src/backend/executor/execUtils.c index ad8a3d0..0ac2e70 100644 --- a/src/backend/executor/execUtils.c +++ b/src/backend/executor/execUtils.c @@ -151,6 +151,7 @@ CreateExecutorState(void) estate->es_epqTupleSet = NULL; estate->es_epqScanDone = NULL; + estate->agg_chain_head = NULL; /* * Return the executor state structure */ diff --git a/src/backend/executor/nodeAgg.c b/src/backend/executor/nodeAgg.c index beecd36..48567b9 100644 --- a/src/backend/executor/nodeAgg.c +++ b/src/backend/executor/nodeAgg.c @@ -326,6 +326,7 @@ static void build_hash_table(AggState *aggstate); static AggHashEntry lookup_hash_entry(AggState *aggstate, TupleTableSlot *inputslot); static TupleTableSlot *agg_retrieve_direct(AggState *aggstate); +static TupleTableSlot *agg_retrieve_chained(AggState *aggstate); static void agg_fill_hash_table(AggState *aggstate); static TupleTableSlot *agg_retrieve_hash_table(AggState *aggstate); static Datum GetAggInitVal(Datum textInitVal, Oid transtype); @@ -1119,6 +1120,8 @@ lookup_hash_entry(AggState *aggstate, TupleTableSlot *inputslot) TupleTableSlot * ExecAgg(AggState *node) { + TupleTableSlot *result; + /* * Check to see if we're still projecting out tuples from a previous agg * tuple (because there is a function-returning-set in the projection @@ -1126,7 +1129,6 @@ ExecAgg(AggState *node) */ if (node->ss.ps.ps_TupFromTlist) { - TupleTableSlot *result; ExprDoneCond isDone; result = ExecProject(node->ss.ps.ps_ProjInfo, &isDone); @@ -1137,22 +1139,45 @@ ExecAgg(AggState *node) } /* - * Exit if nothing left to do. (We must do the ps_TupFromTlist check - * first, because in some cases agg_done gets set before we emit the final - * aggregate tuple, and we have to finish running SRFs for it.) + * (We must do the ps_TupFromTlist check first, because in some cases + * agg_done gets set before we emit the final aggregate tuple, and we have + * to finish running SRFs for it.) */ - if (node->agg_done) - return NULL; - /* Dispatch based on strategy */ - if (((Agg *) node->ss.ps.plan)->aggstrategy == AGG_HASHED) + if (!node->agg_done) { - if (!node->table_filled) - agg_fill_hash_table(node); - return agg_retrieve_hash_table(node); + /* Dispatch based on strategy */ + switch (((Agg *) node->ss.ps.plan)->aggstrategy) + { + case AGG_HASHED: + if (!node->table_filled) + agg_fill_hash_table(node); + result = agg_retrieve_hash_table(node); + break; + case AGG_CHAINED: + result = agg_retrieve_chained(node); + break; + default: + result = agg_retrieve_direct(node); + break; + } + + if (!TupIsNull(result)) + return result; } - else - return agg_retrieve_direct(node); + + if (!node->chain_done) + { + Assert(node->chain_tuplestore); + result = node->ss.ps.ps_ResultTupleSlot; + ExecClearTuple(result); + if (tuplestore_gettupleslot(node->chain_tuplestore, + true, false, result)) + return result; + node->chain_done = true; + } + + return NULL; } /* @@ -1473,6 +1498,161 @@ agg_retrieve_direct(AggState *aggstate) return NULL; } + +/* + * ExecAgg for chained case (pullthrough mode) + */ +static TupleTableSlot * +agg_retrieve_chained(AggState *aggstate) +{ + Agg *node = (Agg *) aggstate->ss.ps.plan; + ExprContext *econtext = aggstate->ss.ps.ps_ExprContext; + ExprContext *tmpcontext = aggstate->tmpcontext; + Datum *aggvalues = econtext->ecxt_aggvalues; + bool *aggnulls = econtext->ecxt_aggnulls; + AggStatePerAgg peragg = aggstate->peragg; + AggStatePerGroup pergroup = aggstate->pergroup; + TupleTableSlot *outerslot; + TupleTableSlot *firstSlot = aggstate->ss.ss_ScanTupleSlot; + int aggno; + int numGroupingSets = Max(aggstate->numsets, 1); + int currentSet = 0; + + /* + * The invariants here are: + * + * - when called, we've already projected every result that + * might have been generated by previous rows, and if this is not + * the first row, then grp_firsttuple has the representative input + * row. + * + * - we must pull the outer plan exactly once and return that tuple. If + * the outer plan ends, we project whatever needs projecting. + */ + + outerslot = ExecProcNode(outerPlanState(aggstate)); + + /* + * If this is the first row and it's empty, nothing to do. + */ + + if (TupIsNull(firstSlot) && TupIsNull(outerslot)) + { + aggstate->agg_done = true; + return outerslot; + } + + /* + * See if we need to project anything. (We don't need to worry about + * grouping sets of size 0, the planner doesn't give us those.) + */ + + econtext->ecxt_outertuple = firstSlot; + + while (!TupIsNull(firstSlot) + && (TupIsNull(outerslot) + || !execTuplesMatch(firstSlot, + outerslot, + aggstate->gset_lengths[currentSet], + node->grpColIdx, + aggstate->eqfunctions, + tmpcontext->ecxt_per_tuple_memory))) + { + aggstate->current_set = aggstate->projected_set = currentSet; + + for (aggno = 0; aggno < aggstate->numaggs; aggno++) + { + AggStatePerAgg peraggstate = &peragg[aggno]; + AggStatePerGroup pergroupstate; + + pergroupstate = &pergroup[aggno + (currentSet * (aggstate->numaggs))]; + + if (peraggstate->numSortCols > 0) + { + if (peraggstate->numInputs == 1) + process_ordered_aggregate_single(aggstate, + peraggstate, + pergroupstate); + else + process_ordered_aggregate_multi(aggstate, + peraggstate, + pergroupstate); + } + + finalize_aggregate(aggstate, peraggstate, pergroupstate, + &aggvalues[aggno], &aggnulls[aggno]); + } + + econtext->grouped_cols = aggstate->grouped_cols[currentSet]; + + /* + * Check the qual (HAVING clause); if the group does not match, ignore + * it. + */ + if (ExecQual(aggstate->ss.ps.qual, econtext, false)) + { + /* + * Form a projection tuple using the aggregate results + * and the representative input tuple. + */ + TupleTableSlot *result; + ExprDoneCond isDone; + + do + { + result = ExecProject(aggstate->ss.ps.ps_ProjInfo, &isDone); + + if (isDone != ExprEndResult) + { + tuplestore_puttupleslot(aggstate->chain_tuplestore, + result); + } + } + while (isDone == ExprMultipleResult); + } + else + InstrCountFiltered1(aggstate, 1); + + ReScanExprContext(tmpcontext); + ReScanExprContext(econtext); + ReScanExprContext(aggstate->aggcontext[currentSet]); + MemoryContextDeleteChildren(aggstate->aggcontext[currentSet]->ecxt_per_tuple_memory); + if (++currentSet >= numGroupingSets) + break; + } + + if (TupIsNull(outerslot)) + { + aggstate->agg_done = true; + return NULL; + } + + /* + * If this is the first tuple, store it and initialize everything. + * Otherwise re-init any aggregates we projected above. + */ + + if (TupIsNull(firstSlot)) + { + ExecCopySlot(firstSlot, outerslot); + initialize_aggregates(aggstate, peragg, pergroup, numGroupingSets); + } + else if (currentSet > 0) + { + ExecCopySlot(firstSlot, outerslot); + initialize_aggregates(aggstate, peragg, pergroup, currentSet); + } + + tmpcontext->ecxt_outertuple = outerslot; + + advance_aggregates(aggstate, pergroup); + + /* Reset per-input-tuple context after each tuple */ + ResetExprContext(tmpcontext); + + return outerslot; +} + /* * ExecAgg for hashed case: phase 1, read input and build hash table */ @@ -1640,6 +1820,7 @@ AggState * ExecInitAgg(Agg *node, EState *estate, int eflags) { AggState *aggstate; + AggState *save_chain_head = NULL; AggStatePerAgg peragg; Plan *outerPlan; ExprContext *econtext; @@ -1672,9 +1853,14 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) aggstate->curperagg = NULL; aggstate->agg_done = false; aggstate->input_done = false; + aggstate->chain_done = true; aggstate->pergroup = NULL; aggstate->grp_firstTuple = NULL; aggstate->hashtable = NULL; + aggstate->chain_depth = 0; + aggstate->chain_rescan = 0; + aggstate->chain_head = NULL; + aggstate->chain_tuplestore = NULL; if (node->groupingSets) { @@ -1734,6 +1920,7 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) ExecInitResultTupleSlot(estate, &aggstate->ss.ps); aggstate->hashslot = ExecInitExtraTupleSlot(estate); + /* * initialize child expressions * @@ -1743,12 +1930,40 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) * that is true, we don't need to worry about evaluating the aggs in any * particular order. */ - aggstate->ss.ps.targetlist = (List *) - ExecInitExpr((Expr *) node->plan.targetlist, - (PlanState *) aggstate); - aggstate->ss.ps.qual = (List *) - ExecInitExpr((Expr *) node->plan.qual, - (PlanState *) aggstate); + if (node->aggstrategy == AGG_CHAINED) + { + Assert(estate->agg_chain_head); + + aggstate->chain_head = estate->agg_chain_head; + aggstate->chain_head->chain_depth++; + + /* + * Snarf the real targetlist and qual from the chain head node + */ + aggstate->ss.ps.targetlist = (List *) + ExecInitExpr((Expr *) aggstate->chain_head->ss.ps.plan->targetlist, + (PlanState *) aggstate); + aggstate->ss.ps.qual = (List *) + ExecInitExpr((Expr *) aggstate->chain_head->ss.ps.plan->qual, + (PlanState *) aggstate); + } + else + { + aggstate->ss.ps.targetlist = (List *) + ExecInitExpr((Expr *) node->plan.targetlist, + (PlanState *) aggstate); + aggstate->ss.ps.qual = (List *) + ExecInitExpr((Expr *) node->plan.qual, + (PlanState *) aggstate); + } + + if (node->chain_head) + { + save_chain_head = estate->agg_chain_head; + estate->agg_chain_head = aggstate; + aggstate->chain_tuplestore = tuplestore_begin_heap(false, false, work_mem); + aggstate->chain_done = false; + } /* * initialize child nodes @@ -1761,6 +1976,11 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) outerPlan = outerPlan(node); outerPlanState(aggstate) = ExecInitNode(outerPlan, estate, eflags); + if (node->chain_head) + { + estate->agg_chain_head = save_chain_head; + } + /* * initialize source tuple type. */ @@ -1769,8 +1989,35 @@ ExecInitAgg(Agg *node, EState *estate, int eflags) /* * Initialize result tuple type and projection info. */ - ExecAssignResultTypeFromTL(&aggstate->ss.ps); - ExecAssignProjectionInfo(&aggstate->ss.ps, NULL); + if (node->aggstrategy == AGG_CHAINED) + { + PlanState *head_ps = &aggstate->chain_head->ss.ps; + bool hasoid; + + /* + * We must calculate this the same way that the chain head does, + * regardless of intermediate nodes, for consistency + */ + if (!ExecContextForcesOids(head_ps, &hasoid)) + hasoid = false; + + ExecAssignResultType(&aggstate->ss.ps, ExecGetScanType(&aggstate->ss)); + ExecSetSlotDescriptor(aggstate->hashslot, + ExecTypeFromTL(head_ps->plan->targetlist, hasoid)); + aggstate->ss.ps.ps_ProjInfo = + ExecBuildProjectionInfo(aggstate->ss.ps.targetlist, + aggstate->ss.ps.ps_ExprContext, + aggstate->hashslot, + NULL); + + aggstate->chain_tuplestore = aggstate->chain_head->chain_tuplestore; + Assert(aggstate->chain_tuplestore); + } + else + { + ExecAssignResultTypeFromTL(&aggstate->ss.ps); + ExecAssignProjectionInfo(&aggstate->ss.ps, NULL); + } aggstate->ss.ps.ps_TupFromTlist = false; @@ -2225,6 +2472,9 @@ ExecEndAgg(AggState *node) for (i = 0; i < numGroupingSets; ++i) ReScanExprContext(node->aggcontext[i]); + if (node->chain_tuplestore && !node->chain_head) + tuplestore_end(node->chain_tuplestore); + /* * We don't actually free any ExprContexts here (see comment in * ExecFreeExprContext), just unlinking the output one from the plan node @@ -2339,11 +2589,54 @@ ExecReScanAgg(AggState *node) } /* - * if chgParam of subnode is not null then plan will be re-scanned by - * first ExecProcNode. + * If we're in a chain, let the chain head know whether we + * rescanned. (This is nonsense if it happens as a result of chgParam, + * but the chain head only cares about this when rescanning explicitly + * when chgParam is empty.) + */ + + if (aggnode->aggstrategy == AGG_CHAINED) + node->chain_head->chain_rescan++; + + /* + * If we're a chain head, we reset the tuplestore if parameters changed, + * and let subplans repopulate it. + * + * If we're a chain head and the subplan parameters did NOT change, then + * whether we need to reset the tuplestore depends on whether anything + * (specifically the Sort nodes) protects the child ChainAggs from rescan. + * Since this is hard to know in advance, we have the ChainAggs signal us + * as to whether the reset is needed. (We assume that either all children + * in the chain are protected or none are; since all Sort nodes in the + * chain should have the same flags. If this changes, it would probably be + * necessary to add a signalling param to force child rescan.) */ - if (node->ss.ps.lefttree->chgParam == NULL) + if (aggnode->chain_head) + { + if (node->ss.ps.lefttree->chgParam) + tuplestore_clear(node->chain_tuplestore); + else + { + node->chain_rescan = 0; + + ExecReScan(node->ss.ps.lefttree); + + if (node->chain_rescan == node->chain_depth) + tuplestore_clear(node->chain_tuplestore); + else if (node->chain_rescan == 0) + tuplestore_rescan(node->chain_tuplestore); + else + elog(ERROR, "chained aggregate rescan depth error"); + } + node->chain_done = false; + } + else if (node->ss.ps.lefttree->chgParam == NULL) + { + + + ExecReScan(node->ss.ps.lefttree); + } } diff --git a/src/backend/nodes/copyfuncs.c b/src/backend/nodes/copyfuncs.c index cb648f8..71c9554 100644 --- a/src/backend/nodes/copyfuncs.c +++ b/src/backend/nodes/copyfuncs.c @@ -772,6 +772,7 @@ _copyAgg(const Agg *from) CopyPlanFields((const Plan *) from, (Plan *) newnode); COPY_SCALAR_FIELD(aggstrategy); + COPY_SCALAR_FIELD(chain_head); COPY_SCALAR_FIELD(numCols); if (from->numCols > 0) { diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index a9cdb95..6131fec 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -632,6 +632,7 @@ _outAgg(StringInfo str, const Agg *node) _outPlanInfo(str, (const Plan *) node); WRITE_ENUM_FIELD(aggstrategy, AggStrategy); + WRITE_BOOL_FIELD(chain_head); WRITE_INT_FIELD(numCols); appendStringInfoString(str, " :grpColIdx"); diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index 1a47f0f..96ea58f 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -1016,6 +1016,7 @@ create_unique_plan(PlannerInfo *root, UniquePath *best_path) groupColIdx, groupOperators, NIL, + false, numGroups, subplan); } @@ -4266,7 +4267,7 @@ Agg * make_agg(PlannerInfo *root, List *tlist, List *qual, AggStrategy aggstrategy, const AggClauseCosts *aggcosts, int numGroupCols, AttrNumber *grpColIdx, Oid *grpOperators, - List *groupingSets, + List *groupingSets, bool chain_head, long numGroups, Plan *lefttree) { @@ -4276,6 +4277,7 @@ make_agg(PlannerInfo *root, List *tlist, List *qual, QualCost qual_cost; node->aggstrategy = aggstrategy; + node->chain_head = chain_head; node->numCols = numGroupCols; node->grpColIdx = grpColIdx; node->grpOperators = grpOperators; @@ -4320,8 +4322,21 @@ make_agg(PlannerInfo *root, List *tlist, List *qual, } add_tlist_costs_to_plan(root, plan, tlist); - plan->qual = qual; - plan->targetlist = tlist; + if (aggstrategy == AGG_CHAINED) + { + Assert(!chain_head); + plan->plan_rows = lefttree->plan_rows; + plan->plan_width = lefttree->plan_width; + + /* supplied tlist is ignored, this is dummy */ + plan->targetlist = lefttree->targetlist; + plan->qual = NULL; + } + else + { + plan->qual = qual; + plan->targetlist = tlist; + } plan->lefttree = lefttree; plan->righttree = NULL; diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 2889a35..8fed104 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -16,6 +16,7 @@ #include "postgres.h" #include +#include #include "access/htup_details.h" #include "executor/executor.h" @@ -67,6 +68,7 @@ typedef struct { List *tlist; /* preprocessed query targetlist */ List *activeWindows; /* active windows, if any */ + List *groupClause; /* overrides parse->groupClause */ } standard_qp_extra; /* Local functions */ @@ -80,7 +82,8 @@ static double preprocess_limit(PlannerInfo *root, int64 *offset_est, int64 *count_est); static bool limit_needed(Query *parse); static List *preprocess_groupclause(PlannerInfo *root, List *force); -static List *extract_rollup_sets(List *groupingSets, List *sortclause, List **remainder); +static List *extract_rollup_sets(List *groupingSets); +static List *reorder_grouping_sets(List *groupingSets, List *sortclause); static void standard_qp_callback(PlannerInfo *root, void *extra); static bool choose_hashed_grouping(PlannerInfo *root, double tuple_fraction, double limit_tuples, @@ -1182,11 +1185,6 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) List *sub_tlist; AttrNumber *groupColIdx = NULL; bool need_tlist_eval = true; - standard_qp_extra qp_extra; - RelOptInfo *final_rel; - Path *cheapest_path; - Path *sorted_path; - Path *best_path; long numGroups = 0; AggClauseCosts agg_costs; int numGroupCols; @@ -1196,7 +1194,14 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) WindowFuncLists *wflists = NULL; List *activeWindows = NIL; int maxref = 0; - int *refmap = NULL; + List *refmaps = NIL; + List *rollup_lists = NIL; + List *rollup_groupclauses = NIL; + standard_qp_extra qp_extra; + RelOptInfo *final_rel; + Path *cheapest_path; + Path *sorted_path; + Path *best_path; MemSet(&agg_costs, 0, sizeof(AggClauseCosts)); @@ -1207,33 +1212,9 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) if (parse->groupingSets) parse->groupingSets = expand_grouping_sets(parse->groupingSets, -1); - if (parse->groupingSets) + if (parse->groupClause) { ListCell *lc; - ListCell *lc2; - int ref = 0; - List *remaining_sets = NIL; - List *usable_sets = extract_rollup_sets(parse->groupingSets, - parse->sortClause, - &remaining_sets); - - /* - * TODO - if the grouping set list can't be handled as one rollup... - */ - - if (remaining_sets != NIL) - elog(ERROR, "not implemented yet"); - - parse->groupingSets = usable_sets; - - if (parse->groupClause) - preprocess_groupclause(root, linitial(parse->groupingSets)); - - /* - * Now that we've pinned down an order for the groupClause for this - * list of grouping sets, remap the entries in the grouping sets - * from sortgrouprefs to plain indices into the groupClause. - */ foreach(lc, parse->groupClause) { @@ -1241,29 +1222,59 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) if (gc->tleSortGroupRef > maxref) maxref = gc->tleSortGroupRef; } + } - refmap = palloc0(sizeof(int) * (maxref + 1)); + if (parse->groupingSets) + { + ListCell *lc; + ListCell *lc2; + ListCell *lc_set; + List *sets = extract_rollup_sets(parse->groupingSets); - foreach(lc, parse->groupClause) + foreach(lc_set, sets) { - SortGroupClause *gc = lfirst(lc); - refmap[gc->tleSortGroupRef] = ++ref; - } + List *current_sets = reorder_grouping_sets(lfirst(lc_set), + (list_length(sets) == 1 + ? parse->sortClause + : NIL)); + List *groupclause = preprocess_groupclause(root, linitial(current_sets)); + int ref = 0; + int *refmap; - foreach(lc, usable_sets) - { - foreach(lc2, (List *) lfirst(lc)) + /* + * Now that we've pinned down an order for the groupClause for this + * list of grouping sets, remap the entries in the grouping sets + * from sortgrouprefs to plain indices into the groupClause. + */ + + refmap = palloc0(sizeof(int) * (maxref + 1)); + + foreach(lc, groupclause) { - Assert(refmap[lfirst_int(lc2)] > 0); - lfirst_int(lc2) = refmap[lfirst_int(lc2)] - 1; + SortGroupClause *gc = lfirst(lc); + refmap[gc->tleSortGroupRef] = ++ref; + } + + foreach(lc, current_sets) + { + foreach(lc2, (List *) lfirst(lc)) + { + Assert(refmap[lfirst_int(lc2)] > 0); + lfirst_int(lc2) = refmap[lfirst_int(lc2)] - 1; + } } + + rollup_lists = lcons(current_sets, rollup_lists); + rollup_groupclauses = lcons(groupclause, rollup_groupclauses); + refmaps = lcons(refmap, refmaps); } } else { /* Preprocess GROUP BY clause, if any */ if (parse->groupClause) - preprocess_groupclause(root, NIL); + parse->groupClause = preprocess_groupclause(root, NIL); + rollup_groupclauses = list_make1(parse->groupClause); } numGroupCols = list_length(parse->groupClause); @@ -1328,9 +1339,6 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) preprocess_minmax_aggregates(root, tlist); } - if (refmap) - pfree(refmap); - /* Make tuple_fraction accessible to lower-level routines */ root->tuple_fraction = tuple_fraction; @@ -1353,6 +1361,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) /* Set up data needed by standard_qp_callback */ qp_extra.tlist = tlist; qp_extra.activeWindows = activeWindows; + qp_extra.groupClause = linitial(rollup_groupclauses); /* * Generate the best unsorted and presorted paths for this Query (but @@ -1379,6 +1388,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) * to describe the fraction of the underlying un-aggregated tuples * that will be fetched. */ + dNumGroups = 1; /* in case not grouping */ if (parse->groupClause) @@ -1414,6 +1424,9 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) if (tuple_fraction >= 1.0) tuple_fraction /= dNumGroups; + if (list_length(rollup_lists) > 1) + tuple_fraction = 0.0; + /* * If both GROUP BY and ORDER BY are specified, we will need two * levels of sort --- and, therefore, certainly need to read all @@ -1437,6 +1450,8 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) * set to 1). */ tuple_fraction = 0.0; + if (parse->groupingSets) + dNumGroups = list_length(parse->groupingSets); } else if (parse->distinctClause) { @@ -1617,7 +1632,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) /* Detect if we'll need an explicit sort for grouping */ if (parse->groupClause && !use_hashed_grouping && - !pathkeys_contained_in(root->group_pathkeys, current_pathkeys)) + !pathkeys_contained_in(root->group_pathkeys, current_pathkeys)) { need_sort_for_grouping = true; @@ -1692,8 +1707,9 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) &agg_costs, numGroupCols, groupColIdx, - extract_grouping_ops(parse->groupClause), + extract_grouping_ops(parse->groupClause), NIL, + false, numGroups, result_plan); /* Hashed aggregation produces randomly-ordered results */ @@ -1701,45 +1717,94 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) } else if (parse->hasAggs || (parse->groupingSets && parse->groupClause)) { - /* Plain aggregate plan --- sort if needed */ - AggStrategy aggstrategy; + bool is_chained = false; - if (parse->groupClause) + /* + * If we need multiple grouping nodes, start stacking them up; + * all except the last are chained. + */ + + do { - if (need_sort_for_grouping) + List *groupClause = linitial(rollup_groupclauses); + List *gsets = rollup_lists ? linitial(rollup_lists) : NIL; + int *refmap = refmaps ? linitial(refmaps) : NULL; + AttrNumber *new_grpColIdx = groupColIdx; + ListCell *lc; + int i; + AggStrategy aggstrategy = AGG_CHAINED; + + if (groupClause) { - result_plan = (Plan *) - make_sort_from_groupcols(root, - parse->groupClause, - groupColIdx, - result_plan); - current_pathkeys = root->group_pathkeys; + /* need to remap groupColIdx */ + + if (gsets) + { + Assert(refmap); + + new_grpColIdx = palloc0(sizeof(AttrNumber) * list_length(linitial(gsets))); + + i = 0; + foreach(lc, parse->groupClause) + { + int j = refmap[((SortGroupClause *)lfirst(lc))->tleSortGroupRef]; + if (j > 0) + new_grpColIdx[j - 1] = groupColIdx[i]; + ++i; + } + } + + if (need_sort_for_grouping) + { + result_plan = (Plan *) + make_sort_from_groupcols(root, + groupClause, + new_grpColIdx, + result_plan); + } + else + need_sort_for_grouping = true; + + if (list_length(rollup_groupclauses) == 1) + { + aggstrategy = AGG_SORTED; + if (!is_chained) + current_pathkeys = root->group_pathkeys; + } + else + current_pathkeys = NIL; + } + else + { + aggstrategy = AGG_PLAIN; + current_pathkeys = NIL; } - aggstrategy = AGG_SORTED; - /* - * The AGG node will not change the sort ordering of its - * groups, so current_pathkeys describes the result too. - */ - } - else - { - aggstrategy = AGG_PLAIN; - /* Result will have no sort order */ - current_pathkeys = NIL; + result_plan = (Plan *) make_agg(root, + tlist, + (List *) parse->havingQual, + aggstrategy, + &agg_costs, + gsets ? list_length(linitial(gsets)) : numGroupCols, + new_grpColIdx, + extract_grouping_ops(groupClause), + gsets, + is_chained && (aggstrategy != AGG_CHAINED), + numGroups, + result_plan); + + is_chained = true; + + if (refmap) + pfree(refmap); + if (rollup_lists) + rollup_lists = list_delete_first(rollup_lists); + if (refmaps) + refmaps = list_delete_first(refmaps); + + rollup_groupclauses = list_delete_first(rollup_groupclauses); } - - result_plan = (Plan *) make_agg(root, - tlist, - (List *) parse->havingQual, - aggstrategy, - &agg_costs, - numGroupCols, - groupColIdx, - extract_grouping_ops(parse->groupClause), - parse->groupingSets, - numGroups, - result_plan); + while (rollup_groupclauses); } else if (parse->groupClause) { @@ -2034,6 +2099,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction) result_plan->targetlist), extract_grouping_ops(parse->distinctClause), NIL, + false, numDistinctRows, result_plan); /* Hashed aggregation produces randomly-ordered results */ @@ -2755,64 +2821,394 @@ preprocess_groupclause(PlannerInfo *root, List *force) /* - * Extract a list of grouping sets that can be implemented using a single - * rollup-type aggregate pass. The order of elements in each returned set is - * modified to ensure proper prefix relationships; the sets are returned in - * decreasing order of size. (The input must also be in descending order of - * size.) + * We want to produce the absolute minimum possible number of lists here to + * avoid excess sorts. Fortunately, there is an algorithm for this; the problem + * of finding the minimal partition of a poset into chains (which is what we + * need, taking the list of grouping sets as a poset ordered by set inclusion) + * can be mapped to the problem of finding the maximum cardinality matching on + * a bipartite graph, which is solvable in polynomial time with a worst case of + * no worse than O(n^2.5) and usually much better. Since our N is at most 4096, + * we don't need to consider fallbacks to heuristic or approximate methods. + * (Planning time for a 12-d cube is under half a second on my modest system + * even with optimization off and assertions on.) * - * If we're passed in a sortclause, we follow its order of columns to the - * extent possible, to minimize the chance that we add unnecessary sorts. + * We use the Hopcroft-Karp algorithm for the graph matching; it seems to work + * well enough for our purposes. + * + * This implementation uses the same indices for elements of U and V (the two + * halves of the graph) because in our case they are always the same size, and + * we always know whether an index represents a u or a v. Index 0 is reserved + * for the NIL node. + */ + +struct hk_state +{ + int graph_size; /* size of half the graph plus NIL node */ + int matching; + short **adjacency; /* adjacency[u] = [n, v1,v2,v3,...,vn] */ + short *pair_uv; /* pair_uv[u] -> v */ + short *pair_vu; /* pair_vu[v] -> u */ + float *distance; /* distance[u], float so we can have +inf */ + short *queue; /* queue storage for breadth search */ +}; + +static bool +hk_breadth_search(struct hk_state *state) +{ + int gsize = state->graph_size; + short *queue = state->queue; + float *distance = state->distance; + int qhead = 0; /* we never enqueue any node more than once */ + int qtail = 0; /* so don't have to worry about wrapping */ + int u; + + distance[0] = INFINITY; + + for (u = 1; u < gsize; ++u) + { + if (state->pair_uv[u] == 0) + { + distance[u] = 0; + queue[qhead++] = u; + } + else + distance[u] = INFINITY; + } + + while (qtail < qhead) + { + u = queue[qtail++]; + + if (distance[u] < distance[0]) + { + short *u_adj = state->adjacency[u]; + int i = u_adj ? u_adj[0] : 0; + + for (; i > 0; --i) + { + int u_next = state->pair_vu[u_adj[i]]; + + if (isinf(distance[u_next])) + { + distance[u_next] = 1 + distance[u]; + queue[qhead++] = u_next; + Assert(qhead <= gsize+1); + } + } + } + } + + return !isinf(distance[0]); +} + +static bool +hk_depth_search(struct hk_state *state, int u, int depth) +{ + float *distance = state->distance; + short *pair_uv = state->pair_uv; + short *pair_vu = state->pair_vu; + short *u_adj = state->adjacency[u]; + int i = u_adj ? u_adj[0] : 0; + + if (u == 0) + return true; + + if ((depth % 8) == 0) + check_stack_depth(); + + for (; i > 0; --i) + { + int v = u_adj[i]; + + if (distance[pair_vu[v]] == distance[u] + 1) + { + if (hk_depth_search(state, pair_vu[v], depth+1)) + { + pair_vu[v] = u; + pair_uv[u] = v; + return true; + } + } + } + + distance[u] = INFINITY; + return false; +} + +static struct hk_state * +hk_match(int graph_size, short **adjacency) +{ + struct hk_state *state = palloc(sizeof(struct hk_state)); + + state->graph_size = graph_size; + state->matching = 0; + state->adjacency = adjacency; + state->pair_uv = palloc0(graph_size * sizeof(short)); + state->pair_vu = palloc0(graph_size * sizeof(short)); + state->distance = palloc(graph_size * sizeof(float)); + state->queue = palloc((graph_size + 2) * sizeof(short)); + + while (hk_breadth_search(state)) + { + int u; + + for (u = 1; u < graph_size; ++u) + if (state->pair_uv[u] == 0) + if (hk_depth_search(state, u, 1)) + state->matching++; + + CHECK_FOR_INTERRUPTS(); /* just in case */ + } + + return state; +} + +static void +hk_free(struct hk_state *state) +{ + /* adjacency matrix is treated as owned by the caller */ + pfree(state->pair_uv); + pfree(state->pair_vu); + pfree(state->distance); + pfree(state->queue); + pfree(state); +} + +/* + * Extract lists of grouping sets that can be implemented using a single + * rollup-type aggregate pass each. Returns a list of lists of grouping sets. * - * Sets that can't be accomodated within a rollup that includes the first - * (and therefore largest) grouping set in the input are added to the - * remainder list. + * Input must be sorted with smallest sets first. Result has each sublist + * sorted with smallest sets first. */ static List * -extract_rollup_sets(List *groupingSets, List *sortclause, List **remainder) +extract_rollup_sets(List *groupingSets) { - ListCell *lc; - ListCell *lc2; - List *previous = linitial(groupingSets); - List *tmp_result = list_make1(previous); + int num_sets_raw = list_length(groupingSets); + int num_empty = 0; + int num_sets = 0; /* distinct sets */ + int num_chains = 0; List *result = NIL; + List **results; + List **orig_sets; + Bitmapset **set_masks; + int *chains; + short **adjacency; + short *adjacency_buf; + struct hk_state *state; + int i; + int j; + int j_size; + ListCell *lc1 = list_head(groupingSets); + ListCell *lc; - for_each_cell(lc, lnext(list_head(groupingSets))) + /* + * Start by stripping out empty sets. The algorithm doesn't require this, + * but the planner currently needs all empty sets to be returned in the + * first list, so we strip them here and add them back after. + */ + + while (lc1 && lfirst(lc1) == NIL) { - List *candidate = lfirst(lc); - bool ok = true; + ++num_empty; + lc1 = lnext(lc1); + } + + /* bail out now if it turns out that all we had were empty sets. */ + + if (!lc1) + return list_make1(groupingSets); + + /* + * We don't strictly need to remove duplicate sets here, but if we + * don't, they tend to become scattered through the result, which is + * a bit confusing (and irritating if we ever decide to optimize them + * out). So we remove them here and add them back after. + * + * For each non-duplicate set, we fill in the following: + * + * orig_sets[i] = list of the original set lists + * set_masks[i] = bitmapset for testing inclusion + * adjacency[i] = array [n, v1, v2, ... vn] of adjacency indices + * + * chains[i] will be the result group this set is assigned to. + * + * We index all of these from 1 rather than 0 because it is convenient + * to leave 0 free for the NIL node in the graph algorithm. + */ + + orig_sets = palloc0((num_sets_raw + 1) * sizeof(List*)); + set_masks = palloc0((num_sets_raw + 1) * sizeof(Bitmapset *)); + adjacency = palloc0((num_sets_raw + 1) * sizeof(short *)); + adjacency_buf = palloc((num_sets_raw + 1) * sizeof(short)); + + j_size = 0; + j = 0; + i = 1; + + for_each_cell(lc, lc1) + { + List *candidate = lfirst(lc); + Bitmapset *candidate_set = NULL; + ListCell *lc2; + int dup_of = 0; foreach(lc2, candidate) { - int ref = lfirst_int(lc2); - if (!list_member_int(previous, ref)) + candidate_set = bms_add_member(candidate_set, lfirst_int(lc2)); + } + + /* we can only be a dup if we're the same length as a previous set */ + if (j_size == list_length(candidate)) + { + int k; + for (k = j; k < i; ++k) { - ok = false; - break; + if (bms_equal(set_masks[k], candidate_set)) + { + dup_of = k; + break; + } } } + else if (j_size < list_length(candidate)) + { + j_size = list_length(candidate); + j = i; + } - if (ok) + if (dup_of > 0) + { + orig_sets[dup_of] = lappend(orig_sets[dup_of], candidate); + bms_free(candidate_set); + } + else { - tmp_result = lcons(candidate, tmp_result); - previous = candidate; + int k; + int n_adj = 0; + + orig_sets[i] = list_make1(candidate); + set_masks[i] = candidate_set; + + /* fill in adjacency list; no need to compare equal-size sets */ + + for (k = j - 1; k > 0; --k) + { + if (bms_is_subset(set_masks[k], candidate_set)) + adjacency_buf[++n_adj] = k; + } + + if (n_adj > 0) + { + adjacency_buf[0] = n_adj; + adjacency[i] = palloc((n_adj + 1) * sizeof(short)); + memcpy(adjacency[i], adjacency_buf, (n_adj + 1) * sizeof(short)); + } + else + adjacency[i] = NULL; + + ++i; } + } + + num_sets = i - 1; + + /* + * Apply the matching algorithm to do the work. + */ + + state = hk_match(num_sets + 1, adjacency); + + /* + * Now, the state->pair* fields have the info we need to assign sets to + * chains. Two sets (u,v) belong to the same chain if pair_uv[u] = v or + * pair_vu[v] = u (both will be true, but we check both so that we can do + * it in one pass) + */ + + chains = palloc0((num_sets + 1) * sizeof(int)); + + for (i = 1; i <= num_sets; ++i) + { + int u = state->pair_vu[i]; + int v = state->pair_uv[i]; + + if (u > 0 && u < i) + chains[i] = chains[u]; + else if (v > 0 && v < i) + chains[i] = chains[v]; else - *remainder = lappend(*remainder, candidate); + chains[i] = ++num_chains; } + /* build result lists. */ + + results = palloc0((num_chains + 1) * sizeof(List*)); + + for (i = 1; i <= num_sets; ++i) + { + int c = chains[i]; + + Assert(c > 0); + + results[c] = list_concat(results[c], orig_sets[i]); + } + + /* push any empty sets back on the first list. */ + + while (num_empty-- > 0) + results[1] = lcons(NIL, results[1]); + + /* make result list */ + + for (i = 1; i <= num_chains; ++i) + result = lappend(result, results[i]); + /* - * reorder the list elements so that shorter sets are strict - * prefixes of longer ones, and if we ever have a choice, try - * and follow the sortclause if there is one. (We're trying - * here to ensure that GROUPING SETS ((a,b),(b)) ORDER BY b,a - * gets implemented in one pass.) + * Free all the things. + * + * (This is over-fussy for small sets but for large sets we could have tied + * up a nontrivial amount of memory.) */ - previous = NIL; + hk_free(state); + pfree(results); + pfree(chains); + for (i = 1; i <= num_sets; ++i) + if (adjacency[i]) + pfree(adjacency[i]); + pfree(adjacency); + pfree(adjacency_buf); + pfree(orig_sets); + for (i = 1; i <= num_sets; ++i) + bms_free(set_masks[i]); + pfree(set_masks); + + return result; +} + +/* + * Reorder the elements of a list of grouping sets such that they have correct + * prefix relationships. + * + * The input must be ordered with smallest sets first; the result is returned + * with largest sets first. + * + * If we're passed in a sortclause, we follow its order of columns to the + * extent possible, to minimize the chance that we add unnecessary sorts. + * (We're trying here to ensure that GROUPING SETS ((a,b,c),(c)) ORDER BY c,b,a + * gets implemented in one pass.) + */ +static List * +reorder_grouping_sets(List *groupingsets, List *sortclause) +{ + ListCell *lc; + ListCell *lc2; + List *previous = NIL; + List *result = NIL; - foreach(lc, tmp_result) + foreach(lc, groupingsets) { List *candidate = lfirst(lc); List *new_elems = list_difference_int(candidate, previous); @@ -2830,6 +3226,7 @@ extract_rollup_sets(List *groupingSets, List *sortclause, List **remainder) } else { + /* diverged from the sortclause; give up on it */ sortclause = NIL; break; } @@ -2846,7 +3243,6 @@ extract_rollup_sets(List *groupingSets, List *sortclause, List **remainder) } list_free(previous); - list_free(tmp_result); return result; } @@ -2867,11 +3263,11 @@ standard_qp_callback(PlannerInfo *root, void *extra) * sortClause is certainly sort-able, but GROUP BY and DISTINCT might not * be, in which case we just leave their pathkeys empty. */ - if (parse->groupClause && - grouping_is_sortable(parse->groupClause)) + if (qp_extra->groupClause && + grouping_is_sortable(qp_extra->groupClause)) root->group_pathkeys = make_pathkeys_for_sortclauses(root, - parse->groupClause, + qp_extra->groupClause, tlist); else root->group_pathkeys = NIL; diff --git a/src/backend/optimizer/plan/setrefs.c b/src/backend/optimizer/plan/setrefs.c index b1016c6..2d91406 100644 --- a/src/backend/optimizer/plan/setrefs.c +++ b/src/backend/optimizer/plan/setrefs.c @@ -655,8 +655,16 @@ set_plan_refs(PlannerInfo *root, Plan *plan, int rtoffset) } break; case T_Agg: - set_upper_references(root, plan, rtoffset); - set_group_vars(root, (Agg *) plan); + if (((Agg *) plan)->aggstrategy == AGG_CHAINED) + { + /* chained agg does not evaluate tlist */ + set_dummy_tlist_references(plan, rtoffset); + } + else + { + set_upper_references(root, plan, rtoffset); + set_group_vars(root, (Agg *) plan); + } break; case T_Group: set_upper_references(root, plan, rtoffset); @@ -1291,21 +1299,30 @@ fix_scan_expr_walker(Node *node, fix_scan_expr_context *context) * Modify any Var references in the target list of a non-trivial * (i.e. contains grouping sets) Agg node to use GroupedVar instead, * which will conditionally replace them with nulls at runtime. + * Also fill in the cols list of any GROUPING() node. */ static void set_group_vars(PlannerInfo *root, Agg *agg) { set_group_vars_context context; - int i; - Bitmapset *cols = NULL; + AttrNumber *groupColIdx = root->groupColIdx; + int numCols = list_length(root->parse->groupClause); + int i; + Bitmapset *cols = NULL; if (!agg->groupingSets) return; + if (!groupColIdx) + { + Assert(numCols == agg->numCols); + groupColIdx = agg->grpColIdx; + } + context.root = root; - for (i = 0; i < agg->numCols; ++i) - cols = bms_add_member(cols, agg->grpColIdx[i]); + for (i = 0; i < numCols; ++i) + cols = bms_add_member(cols, groupColIdx[i]); context.groupedcols = cols; diff --git a/src/backend/optimizer/plan/subselect.c b/src/backend/optimizer/plan/subselect.c index e0a2ca7..e5befe3 100644 --- a/src/backend/optimizer/plan/subselect.c +++ b/src/backend/optimizer/plan/subselect.c @@ -79,7 +79,8 @@ static Node *process_sublinks_mutator(Node *node, static Bitmapset *finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params, - Bitmapset *scan_params); + Bitmapset *scan_params, + Agg *agg_chain_head); static bool finalize_primnode(Node *node, finalize_primnode_context *context); @@ -2091,7 +2092,7 @@ SS_finalize_plan(PlannerInfo *root, Plan *plan, bool attach_initplans) /* * Now recurse through plan tree. */ - (void) finalize_plan(root, plan, valid_params, NULL); + (void) finalize_plan(root, plan, valid_params, NULL, NULL); bms_free(valid_params); @@ -2142,7 +2143,7 @@ SS_finalize_plan(PlannerInfo *root, Plan *plan, bool attach_initplans) */ static Bitmapset * finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params, - Bitmapset *scan_params) + Bitmapset *scan_params, Agg *agg_chain_head) { finalize_primnode_context context; int locally_added_param; @@ -2351,7 +2352,8 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params, finalize_plan(root, (Plan *) lfirst(l), valid_params, - scan_params)); + scan_params, + NULL)); } } break; @@ -2367,7 +2369,8 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params, finalize_plan(root, (Plan *) lfirst(l), valid_params, - scan_params)); + scan_params, + NULL)); } } break; @@ -2383,7 +2386,8 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params, finalize_plan(root, (Plan *) lfirst(l), valid_params, - scan_params)); + scan_params, + NULL)); } } break; @@ -2399,7 +2403,8 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params, finalize_plan(root, (Plan *) lfirst(l), valid_params, - scan_params)); + scan_params, + NULL)); } } break; @@ -2415,7 +2420,8 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params, finalize_plan(root, (Plan *) lfirst(l), valid_params, - scan_params)); + scan_params, + NULL)); } } break; @@ -2482,8 +2488,30 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params, &context); break; - case T_Hash: case T_Agg: + { + Agg *agg = (Agg *) plan; + + if (agg->aggstrategy == AGG_CHAINED) + { + Assert(agg_chain_head); + + /* + * our real tlist and qual are the ones in the chain head, + * not the local ones which are dummy for passthrough. + * Fortunately we can call finalize_primnode more than + * once. + */ + + finalize_primnode((Node *) agg_chain_head->plan.targetlist, &context); + finalize_primnode((Node *) agg_chain_head->plan.qual, &context); + } + else if (agg->chain_head) + agg_chain_head = agg; + } + break; + + case T_Hash: case T_Material: case T_Sort: case T_Unique: @@ -2500,7 +2528,8 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params, child_params = finalize_plan(root, plan->lefttree, valid_params, - scan_params); + scan_params, + agg_chain_head); context.paramids = bms_add_members(context.paramids, child_params); if (nestloop_params) @@ -2509,7 +2538,8 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params, child_params = finalize_plan(root, plan->righttree, bms_union(nestloop_params, valid_params), - scan_params); + scan_params, + agg_chain_head); /* ... and they don't count as parameters used at my level */ child_params = bms_difference(child_params, nestloop_params); bms_free(nestloop_params); @@ -2520,7 +2550,8 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params, child_params = finalize_plan(root, plan->righttree, valid_params, - scan_params); + scan_params, + agg_chain_head); } context.paramids = bms_add_members(context.paramids, child_params); diff --git a/src/backend/optimizer/prep/prepunion.c b/src/backend/optimizer/prep/prepunion.c index 3c71d7f..ce35226 100644 --- a/src/backend/optimizer/prep/prepunion.c +++ b/src/backend/optimizer/prep/prepunion.c @@ -774,6 +774,7 @@ make_union_unique(SetOperationStmt *op, Plan *plan, plan->targetlist), extract_grouping_ops(groupList), NIL, + false, numGroups, plan); /* Hashed aggregation produces randomly-ordered results */ diff --git a/src/backend/parser/parse_agg.c b/src/backend/parser/parse_agg.c index 02f849b..86063d8 100644 --- a/src/backend/parser/parse_agg.c +++ b/src/backend/parser/parse_agg.c @@ -965,11 +965,11 @@ parseCheckAggregates(ParseState *pstate, Query *qry) * The intersection will often be empty, so help things along by * seeding the intersect with the smallest set. */ - gset_common = llast(gsets); + gset_common = linitial(gsets); if (gset_common) { - foreach(l, gsets) + for_each_cell(l, lnext(list_head(gsets))) { gset_common = list_intersection_int(gset_common, lfirst(l)); if (!gset_common) @@ -1620,16 +1620,16 @@ expand_groupingset_node(GroupingSet *gs) } static int -cmp_list_len_desc(const void *a, const void *b) +cmp_list_len_asc(const void *a, const void *b) { int la = list_length(*(List*const*)a); int lb = list_length(*(List*const*)b); - return (la > lb) ? -1 : (la == lb) ? 0 : 1; + return (la > lb) ? 1 : (la == lb) ? 0 : -1; } /* * Expand a groupingSets clause to a flat list of grouping sets. - * The returned list is sorted by length, longest sets first. + * The returned list is sorted by length, shortest sets first. * * This is mainly for the planner, but we use it here too to do * some consistency checks. @@ -1705,7 +1705,7 @@ expand_grouping_sets(List *groupingSets, int limit) *ptr++ = lfirst(lc); } - qsort(buf, result_len, sizeof(List*), cmp_list_len_desc); + qsort(buf, result_len, sizeof(List*), cmp_list_len_asc); result = NIL; ptr = buf; diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index ee1fe74..cbc7b0c 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -409,6 +409,11 @@ typedef struct EState HeapTuple *es_epqTuple; /* array of EPQ substitute tuples */ bool *es_epqTupleSet; /* true if EPQ tuple is provided */ bool *es_epqScanDone; /* true if EPQ tuple has been fetched */ + + /* + * This is for linking chained aggregate nodes + */ + struct AggState *agg_chain_head; } EState; @@ -1729,6 +1734,7 @@ typedef struct AggState AggStatePerAgg curperagg; /* identifies currently active aggregate */ bool input_done; /* indicates end of input */ bool agg_done; /* indicates completion of Agg scan */ + bool chain_done; /* indicates completion of chained fetch */ int projected_set; /* The last projected grouping set */ int current_set; /* The current grouping set being evaluated */ Bitmapset **grouped_cols; /* column groupings for rollup */ @@ -1742,6 +1748,10 @@ typedef struct AggState List *hash_needed; /* list of columns needed in hash table */ bool table_filled; /* hash table filled yet? */ TupleHashIterator hashiter; /* for iterating through hash table */ + int chain_depth; /* number of chained child nodes */ + int chain_rescan; /* rescan indicator */ + struct AggState *chain_head; + Tuplestorestate *chain_tuplestore; } AggState; /* ---------------- diff --git a/src/include/nodes/plannodes.h b/src/include/nodes/plannodes.h index 28173ab..b006a30 100644 --- a/src/include/nodes/plannodes.h +++ b/src/include/nodes/plannodes.h @@ -623,6 +623,7 @@ typedef enum AggStrategy { AGG_PLAIN, /* simple agg across all input rows */ AGG_SORTED, /* grouped agg, input must be sorted */ + AGG_CHAINED, /* chained agg, input must be sorted */ AGG_HASHED /* grouped agg, use internal hashtable */ } AggStrategy; @@ -630,6 +631,7 @@ typedef struct Agg { Plan plan; AggStrategy aggstrategy; + bool chain_head; int numCols; /* number of grouping columns */ AttrNumber *grpColIdx; /* their indexes in the target list */ Oid *grpOperators; /* equality operators to compare with */ diff --git a/src/include/optimizer/planmain.h b/src/include/optimizer/planmain.h index c4c0004..58d88bc 100644 --- a/src/include/optimizer/planmain.h +++ b/src/include/optimizer/planmain.h @@ -59,6 +59,7 @@ extern Agg *make_agg(PlannerInfo *root, List *tlist, List *qual, AggStrategy aggstrategy, const AggClauseCosts *aggcosts, int numGroupCols, AttrNumber *grpColIdx, Oid *grpOperators, List *groupingSets, + bool chain_head, long numGroups, Plan *lefttree); extern WindowAgg *make_windowagg(PlannerInfo *root, List *tlist, diff --git a/src/test/regress/expected/groupingsets.out b/src/test/regress/expected/groupingsets.out index 2d121c7..e5d6c78 100644 --- a/src/test/regress/expected/groupingsets.out +++ b/src/test/regress/expected/groupingsets.out @@ -281,6 +281,29 @@ select(select (select grouping(c) from (values (1)) v2(c) GROUP BY c) from (valu (3 rows) -- Combinations of operations +select a, b, c, d from gstest2 group by rollup(a,b),grouping sets(c,d); + a | b | c | d +---+---+---+--- + 1 | 1 | 1 | + 1 | | 1 | + | | 1 | + 1 | 1 | 2 | + 1 | 2 | 2 | + 1 | | 2 | + 2 | 2 | 2 | + 2 | | 2 | + | | 2 | + 1 | 1 | | 1 + 1 | | | 1 + | | | 1 + 1 | 1 | | 2 + 1 | 2 | | 2 + 1 | | | 2 + 2 | 2 | | 2 + 2 | | | 2 + | | | 2 +(18 rows) + select a, b from (values (1,2),(2,3)) v(a,b) group by a,b, grouping sets(a); a | b ---+--- @@ -288,6 +311,101 @@ select a, b from (values (1,2),(2,3)) v(a,b) group by a,b, grouping sets(a); 2 | 3 (2 rows) +-- Tests for chained aggregates +select a, b, grouping(a,b), sum(v), count(*), max(v) + from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2)); + a | b | grouping | sum | count | max +---+---+----------+-----+-------+----- + 1 | 1 | 0 | 21 | 2 | 11 + 1 | 2 | 0 | 25 | 2 | 13 + 1 | 3 | 0 | 14 | 1 | 14 + 2 | 3 | 0 | 15 | 1 | 15 + 3 | 3 | 0 | 16 | 1 | 16 + 3 | 4 | 0 | 17 | 1 | 17 + 4 | 1 | 0 | 37 | 2 | 19 + | | 3 | 21 | 2 | 11 + | | 3 | 25 | 2 | 13 + | | 3 | 14 | 1 | 14 + | | 3 | 15 | 1 | 15 + | | 3 | 16 | 1 | 16 + | | 3 | 17 | 1 | 17 + | | 3 | 37 | 2 | 19 + | | 3 | 21 | 2 | 11 + | | 3 | 25 | 2 | 13 + | | 3 | 14 | 1 | 14 + | | 3 | 15 | 1 | 15 + | | 3 | 16 | 1 | 16 + | | 3 | 17 | 1 | 17 + | | 3 | 37 | 2 | 19 +(21 rows) + +select(select (select grouping(a,b) from (values (1)) v2(c)) from (values (1,2)) v1(a,b) group by (a,b)) from (values(6,7)) v3(e,f) GROUP BY ROLLUP((e+1),(f+1)); + grouping +---------- + 0 + 0 + 0 +(3 rows) + +select(select (select grouping(a,b) from (values (1)) v2(c)) from (values (1,2)) v1(a,b) group by (a,b)) from (values(6,7)) v3(e,f) GROUP BY CUBE((e+1),(f+1)) ORDER BY (e+1),(f+1); + grouping +---------- + 0 + 0 + 0 + 0 +(4 rows) + +select a, b, sum(c), sum(sum(c)) over (order by a,b) as rsum + from gstest2 group by cube (a,b) order by rsum, a, b; + a | b | sum | rsum +---+---+-----+------ + 1 | 1 | 8 | 8 + 1 | 2 | 2 | 10 + 1 | | 10 | 20 + 2 | 2 | 2 | 22 + 2 | | 2 | 24 + | 1 | 8 | 32 + | 2 | 4 | 36 + | | 12 | 48 +(8 rows) + +select a, b, sum(c) from (values (1,1,10),(1,1,11),(1,2,12),(1,2,13),(1,3,14),(2,3,15),(3,3,16),(3,4,17),(4,1,18),(4,1,19)) v(a,b,c) group by rollup (a,b); + a | b | sum +---+---+----- + 1 | 1 | 21 + 1 | 2 | 25 + 1 | 3 | 14 + 1 | | 60 + 2 | 3 | 15 + 2 | | 15 + 3 | 3 | 16 + 3 | 4 | 17 + 3 | | 33 + 4 | 1 | 37 + 4 | | 37 + | | 145 +(12 rows) + +select a, b, sum(v.x) + from (values (1),(2)) v(x), gstest_data(v.x) + group by cube (a,b) order by a,b; + a | b | sum +---+---+----- + 1 | 1 | 1 + 1 | 2 | 1 + 1 | 3 | 1 + 1 | | 3 + 2 | 1 | 2 + 2 | 2 | 2 + 2 | 3 | 2 + 2 | | 6 + | 1 | 3 + | 2 | 3 + | 3 | 3 + | | 9 +(12 rows) + -- Agg level check. This query should error out. select (select grouping(a,b) from gstest2) from gstest2 group by a,b; ERROR: Arguments to GROUPING must be grouping expressions of the associated query level @@ -358,4 +476,87 @@ group by rollup(ten); | (11 rows) +-- More rescan tests +select * from (values (1),(2)) v(a) left join lateral (select v.a, four, ten, count(*) from onek group by cube(four,ten)) s on true order by v.a,four,ten; + a | a | four | ten | count +---+---+------+-----+------- + 1 | 1 | 0 | 0 | 50 + 1 | 1 | 0 | 2 | 50 + 1 | 1 | 0 | 4 | 50 + 1 | 1 | 0 | 6 | 50 + 1 | 1 | 0 | 8 | 50 + 1 | 1 | 0 | | 250 + 1 | 1 | 1 | 1 | 50 + 1 | 1 | 1 | 3 | 50 + 1 | 1 | 1 | 5 | 50 + 1 | 1 | 1 | 7 | 50 + 1 | 1 | 1 | 9 | 50 + 1 | 1 | 1 | | 250 + 1 | 1 | 2 | 0 | 50 + 1 | 1 | 2 | 2 | 50 + 1 | 1 | 2 | 4 | 50 + 1 | 1 | 2 | 6 | 50 + 1 | 1 | 2 | 8 | 50 + 1 | 1 | 2 | | 250 + 1 | 1 | 3 | 1 | 50 + 1 | 1 | 3 | 3 | 50 + 1 | 1 | 3 | 5 | 50 + 1 | 1 | 3 | 7 | 50 + 1 | 1 | 3 | 9 | 50 + 1 | 1 | 3 | | 250 + 1 | 1 | | 0 | 100 + 1 | 1 | | 1 | 100 + 1 | 1 | | 2 | 100 + 1 | 1 | | 3 | 100 + 1 | 1 | | 4 | 100 + 1 | 1 | | 5 | 100 + 1 | 1 | | 6 | 100 + 1 | 1 | | 7 | 100 + 1 | 1 | | 8 | 100 + 1 | 1 | | 9 | 100 + 1 | 1 | | | 1000 + 2 | 2 | 0 | 0 | 50 + 2 | 2 | 0 | 2 | 50 + 2 | 2 | 0 | 4 | 50 + 2 | 2 | 0 | 6 | 50 + 2 | 2 | 0 | 8 | 50 + 2 | 2 | 0 | | 250 + 2 | 2 | 1 | 1 | 50 + 2 | 2 | 1 | 3 | 50 + 2 | 2 | 1 | 5 | 50 + 2 | 2 | 1 | 7 | 50 + 2 | 2 | 1 | 9 | 50 + 2 | 2 | 1 | | 250 + 2 | 2 | 2 | 0 | 50 + 2 | 2 | 2 | 2 | 50 + 2 | 2 | 2 | 4 | 50 + 2 | 2 | 2 | 6 | 50 + 2 | 2 | 2 | 8 | 50 + 2 | 2 | 2 | | 250 + 2 | 2 | 3 | 1 | 50 + 2 | 2 | 3 | 3 | 50 + 2 | 2 | 3 | 5 | 50 + 2 | 2 | 3 | 7 | 50 + 2 | 2 | 3 | 9 | 50 + 2 | 2 | 3 | | 250 + 2 | 2 | | 0 | 100 + 2 | 2 | | 1 | 100 + 2 | 2 | | 2 | 100 + 2 | 2 | | 3 | 100 + 2 | 2 | | 4 | 100 + 2 | 2 | | 5 | 100 + 2 | 2 | | 6 | 100 + 2 | 2 | | 7 | 100 + 2 | 2 | | 8 | 100 + 2 | 2 | | 9 | 100 + 2 | 2 | | | 1000 +(70 rows) + +select array(select row(v.a,s1.*) from (select two,four, count(*) from onek group by cube(two,four) order by two,four) s1) from (values (1),(2)) v(a); + array +------------------------------------------------------------------------------------------------------------------------------------------------------ + {"(1,0,0,250)","(1,0,2,250)","(1,0,,500)","(1,1,1,250)","(1,1,3,250)","(1,1,,500)","(1,,0,250)","(1,,1,250)","(1,,2,250)","(1,,3,250)","(1,,,1000)"} + {"(2,0,0,250)","(2,0,2,250)","(2,0,,500)","(2,1,1,250)","(2,1,3,250)","(2,1,,500)","(2,,0,250)","(2,,1,250)","(2,,2,250)","(2,,3,250)","(2,,,1000)"} +(2 rows) + -- end diff --git a/src/test/regress/sql/groupingsets.sql b/src/test/regress/sql/groupingsets.sql index bc571ff..5f32c4a 100644 --- a/src/test/regress/sql/groupingsets.sql +++ b/src/test/regress/sql/groupingsets.sql @@ -108,8 +108,22 @@ select(select (select grouping(e,f) from (values (1)) v2(c)) from (values (1,2)) select(select (select grouping(c) from (values (1)) v2(c) GROUP BY c) from (values (1,2)) v1(a,b) group by (a,b)) from (values(6,7)) v3(e,f) GROUP BY ROLLUP(e,f); -- Combinations of operations +select a, b, c, d from gstest2 group by rollup(a,b),grouping sets(c,d); select a, b from (values (1,2),(2,3)) v(a,b) group by a,b, grouping sets(a); +-- Tests for chained aggregates +select a, b, grouping(a,b), sum(v), count(*), max(v) + from gstest1 group by grouping sets ((a,b),(a+1,b+1),(a+2,b+2)); +select(select (select grouping(a,b) from (values (1)) v2(c)) from (values (1,2)) v1(a,b) group by (a,b)) from (values(6,7)) v3(e,f) GROUP BY ROLLUP((e+1),(f+1)); +select(select (select grouping(a,b) from (values (1)) v2(c)) from (values (1,2)) v1(a,b) group by (a,b)) from (values(6,7)) v3(e,f) GROUP BY CUBE((e+1),(f+1)) ORDER BY (e+1),(f+1); +select a, b, sum(c), sum(sum(c)) over (order by a,b) as rsum + from gstest2 group by cube (a,b) order by rsum, a, b; +select a, b, sum(c) from (values (1,1,10),(1,1,11),(1,2,12),(1,2,13),(1,3,14),(2,3,15),(3,3,16),(3,4,17),(4,1,18),(4,1,19)) v(a,b,c) group by rollup (a,b); +select a, b, sum(v.x) + from (values (1),(2)) v(x), gstest_data(v.x) + group by cube (a,b) order by a,b; + + -- Agg level check. This query should error out. select (select grouping(a,b) from gstest2) from gstest2 group by a,b; @@ -125,4 +139,8 @@ having exists (select 1 from onek b where sum(distinct a.four) = b.four); select ten, sum(distinct four) filter (where four::text ~ '123') from onek a group by rollup(ten); +-- More rescan tests +select * from (values (1),(2)) v(a) left join lateral (select v.a, four, ten, count(*) from onek group by cube(four,ten)) s on true order by v.a,four,ten; +select array(select row(v.a,s1.*) from (select two,four, count(*) from onek group by cube(two,four) order by two,four) s1) from (values (1),(2)) v(a); + -- end