From 2c16e67f46f418239ab90a51611f168508bac66e Mon Sep 17 00:00:00 2001 From: Andres Freund Date: Sun, 15 Jan 2017 19:23:22 -0800 Subject: [PATCH 1/2] Put SRF into a separate node v1. Author: Tom Lane Discussion: https://postgr.es/m/557.1473895705@sss.pgh.pa.us --- src/backend/nodes/outfuncs.c | 1 + src/backend/optimizer/plan/createplan.c | 33 ++++- src/backend/optimizer/plan/planner.c | 219 +++++++++++++++++++++++++------ src/backend/optimizer/util/clauses.c | 104 ++------------- src/backend/optimizer/util/pathnode.c | 75 +++++++++++ src/backend/optimizer/util/tlist.c | 199 ++++++++++++++++++++++++++++ src/include/nodes/relation.h | 1 + src/include/optimizer/clauses.h | 1 - src/include/optimizer/pathnode.h | 4 + src/include/optimizer/tlist.h | 3 + src/test/regress/expected/aggregates.out | 3 +- src/test/regress/expected/limit.out | 10 +- src/test/regress/expected/rangefuncs.out | 10 +- src/test/regress/expected/subselect.out | 26 ++-- src/test/regress/expected/tsrf.out | 11 +- 15 files changed, 544 insertions(+), 156 deletions(-) diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c index cf0a6059e9..73fdc9706d 100644 --- a/src/backend/nodes/outfuncs.c +++ b/src/backend/nodes/outfuncs.c @@ -1805,6 +1805,7 @@ _outProjectionPath(StringInfo str, const ProjectionPath *node) WRITE_NODE_FIELD(subpath); WRITE_BOOL_FIELD(dummypp); + WRITE_BOOL_FIELD(srfpp); } static void diff --git a/src/backend/optimizer/plan/createplan.c b/src/backend/optimizer/plan/createplan.c index c7bcd9b84c..875de739a8 100644 --- a/src/backend/optimizer/plan/createplan.c +++ b/src/backend/optimizer/plan/createplan.c @@ -1421,8 +1421,21 @@ create_projection_plan(PlannerInfo *root, ProjectionPath *best_path) Plan *subplan; List *tlist; - /* Since we intend to project, we don't need to constrain child tlist */ - subplan = create_plan_recurse(root, best_path->subpath, 0); + /* + * XXX Possibly-temporary hack: if the subpath is a dummy ResultPath, + * don't bother with it, just make a Result with no input. This avoids an + * extra Result plan node when doing "SELECT srf()". Depending on what we + * decide about the desired plan structure for SRF-expanding nodes, this + * optimization might have to go away, and in any case it'll probably look + * a good bit different. + */ + if (IsA(best_path->subpath, ResultPath) && + ((ResultPath *) best_path->subpath)->path.pathtarget->exprs == NIL && + ((ResultPath *) best_path->subpath)->quals == NIL) + subplan = NULL; + else + /* Since we intend to project, we don't need to constrain child tlist */ + subplan = create_plan_recurse(root, best_path->subpath, 0); tlist = build_path_tlist(root, &best_path->path); @@ -1441,8 +1454,9 @@ create_projection_plan(PlannerInfo *root, ProjectionPath *best_path) * creation, but that would add expense to creating Paths we might end up * not using.) */ - if (is_projection_capable_path(best_path->subpath) || - tlist_same_exprs(tlist, subplan->targetlist)) + if (!best_path->srfpp && + (is_projection_capable_path(best_path->subpath) || + tlist_same_exprs(tlist, subplan->targetlist))) { /* Don't need a separate Result, just assign tlist to subplan */ plan = subplan; @@ -6192,6 +6206,17 @@ is_projection_capable_path(Path *path) * projection to its dummy path. */ return IS_DUMMY_PATH(path); + case T_Result: + + /* + * If the path is doing SRF evaluation, claim it can't project, so + * we don't jam a new tlist into it and thereby break the property + * that the SRFs appear at top level. + */ + if (IsA(path, ProjectionPath) && + ((ProjectionPath *) path)->srfpp) + return false; + break; default: break; } diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index f936710171..70870bbbe0 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -153,6 +153,8 @@ static List *make_pathkeys_for_window(PlannerInfo *root, WindowClause *wc, static PathTarget *make_sort_input_target(PlannerInfo *root, PathTarget *final_target, bool *have_postponed_srfs); +static void adjust_paths_for_srfs(PlannerInfo *root, RelOptInfo *rel, + List *targets, List *targets_contain_srfs); /***************************************************************************** @@ -1434,8 +1436,9 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, int64 count_est = 0; double limit_tuples = -1.0; bool have_postponed_srfs = false; - double tlist_rows; PathTarget *final_target; + List *final_targets; + List *final_targets_contain_srfs; RelOptInfo *current_rel; RelOptInfo *final_rel; ListCell *lc; @@ -1498,6 +1501,10 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, /* Also extract the PathTarget form of the setop result tlist */ final_target = current_rel->cheapest_total_path->pathtarget; + /* The setop result tlist couldn't contain any SRFs */ + Assert(!parse->hasTargetSRFs); + final_targets = final_targets_contain_srfs = NIL; + /* * Can't handle FOR [KEY] UPDATE/SHARE here (parser should have * checked already, but let's make sure). @@ -1523,8 +1530,14 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, { /* No set operations, do regular planning */ PathTarget *sort_input_target; + List *sort_input_targets; + List *sort_input_targets_contain_srfs; PathTarget *grouping_target; + List *grouping_targets; + List *grouping_targets_contain_srfs; PathTarget *scanjoin_target; + List *scanjoin_targets; + List *scanjoin_targets_contain_srfs; bool have_grouping; AggClauseCosts agg_costs; WindowFuncLists *wflists = NULL; @@ -1775,8 +1788,50 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, scanjoin_target = grouping_target; /* - * Forcibly apply scan/join target to all the Paths for the scan/join - * rel. + * If there are any SRFs in the targetlist, we must separate each of + * these PathTargets into SRF-computing and SRF-free targets. Replace + * each of the named targets with a SRF-free version, and remember the + * list of additional projection steps we need to add afterwards. + */ + if (parse->hasTargetSRFs) + { + /* final_target doesn't recompute any SRFs in sort_input_target */ + split_pathtarget_at_srfs(root, final_target, sort_input_target, + &final_targets, + &final_targets_contain_srfs); + final_target = (PathTarget *) linitial(final_targets); + Assert(!linitial_int(final_targets_contain_srfs)); + /* likewise for sort_input_target vs. grouping_target */ + split_pathtarget_at_srfs(root, sort_input_target, grouping_target, + &sort_input_targets, + &sort_input_targets_contain_srfs); + sort_input_target = (PathTarget *) linitial(sort_input_targets); + Assert(!linitial_int(sort_input_targets_contain_srfs)); + /* likewise for grouping_target vs. scanjoin_target */ + split_pathtarget_at_srfs(root, grouping_target, scanjoin_target, + &grouping_targets, + &grouping_targets_contain_srfs); + grouping_target = (PathTarget *) linitial(grouping_targets); + Assert(!linitial_int(grouping_targets_contain_srfs)); + /* scanjoin_target will not have any SRFs precomputed for it */ + split_pathtarget_at_srfs(root, scanjoin_target, NULL, + &scanjoin_targets, + &scanjoin_targets_contain_srfs); + scanjoin_target = (PathTarget *) linitial(scanjoin_targets); + Assert(!linitial_int(scanjoin_targets_contain_srfs)); + } + else + { + /* initialize lists, just to keep compiler quiet */ + final_targets = final_targets_contain_srfs = NIL; + sort_input_targets = sort_input_targets_contain_srfs = NIL; + grouping_targets = grouping_targets_contain_srfs = NIL; + scanjoin_targets = scanjoin_targets_contain_srfs = NIL; + } + + /* + * Forcibly apply SRF-free scan/join target to all the Paths for the + * scan/join rel. * * In principle we should re-run set_cheapest() here to identify the * cheapest path, but it seems unlikely that adding the same tlist @@ -1847,6 +1902,12 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, current_rel->partial_pathlist = NIL; } + /* Now fix things up if scan/join target contains SRFs */ + if (parse->hasTargetSRFs) + adjust_paths_for_srfs(root, current_rel, + scanjoin_targets, + scanjoin_targets_contain_srfs); + /* * Save the various upper-rel PathTargets we just computed into * root->upper_targets[]. The core code doesn't use this, but it @@ -1871,6 +1932,11 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, &agg_costs, rollup_lists, rollup_groupclauses); + /* Fix things up if grouping_target contains SRFs */ + if (parse->hasTargetSRFs) + adjust_paths_for_srfs(root, current_rel, + grouping_targets, + grouping_targets_contain_srfs); } /* @@ -1886,6 +1952,11 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, tlist, wflists, activeWindows); + /* Fix things up if sort_input_target contains SRFs */ + if (parse->hasTargetSRFs) + adjust_paths_for_srfs(root, current_rel, + sort_input_targets, + sort_input_targets_contain_srfs); } /* @@ -1914,40 +1985,11 @@ grouping_planner(PlannerInfo *root, bool inheritance_update, final_target, have_postponed_srfs ? -1.0 : limit_tuples); - } - - /* - * If there are set-returning functions in the tlist, scale up the output - * rowcounts of all surviving Paths to account for that. Note that if any - * SRFs appear in sorting or grouping columns, we'll have underestimated - * the numbers of rows passing through earlier steps; but that's such a - * weird usage that it doesn't seem worth greatly complicating matters to - * account for it. - */ - if (parse->hasTargetSRFs) - tlist_rows = tlist_returns_set_rows(tlist); - else - tlist_rows = 1; - - if (tlist_rows > 1) - { - foreach(lc, current_rel->pathlist) - { - Path *path = (Path *) lfirst(lc); - - /* - * We assume that execution costs of the tlist as such were - * already accounted for. However, it still seems appropriate to - * charge something more for the executor's general costs of - * processing the added tuples. The cost is probably less than - * cpu_tuple_cost, though, so we arbitrarily use half of that. - */ - path->total_cost += path->rows * (tlist_rows - 1) * - cpu_tuple_cost / 2; - - path->rows *= tlist_rows; - } - /* No need to run set_cheapest; we're keeping all paths anyway. */ + /* Fix things up if final_target contains SRFs */ + if (parse->hasTargetSRFs) + adjust_paths_for_srfs(root, current_rel, + final_targets, + final_targets_contain_srfs); } /* @@ -5151,6 +5193,109 @@ get_cheapest_fractional_path(RelOptInfo *rel, double tuple_fraction) } /* + * adjust_paths_for_srfs + * Fix up the Paths of the given upperrel to handle tSRFs properly. + * + * The executor can only handle set-returning functions that appear at the + * top level of the targetlist of a Result plan node. If we have any SRFs + * that are not at top level, we need to split up the evaluation into multiple + * plan levels in which each level satisfies this constraint. This function + * modifies each Path of an upperrel that (might) compute any SRFs in its + * output tlist to insert appropriate projection steps. + * + * The given targets and targets_contain_srfs lists are from + * split_pathtarget_at_srfs(). We assume the existing Paths emit the first + * target in targets. + */ +static void +adjust_paths_for_srfs(PlannerInfo *root, RelOptInfo *rel, + List *targets, List *targets_contain_srfs) +{ + ListCell *lc; + + Assert(list_length(targets) == list_length(targets_contain_srfs)); + Assert(!linitial_int(targets_contain_srfs)); + + /* If no SRFs appear at this plan level, nothing to do */ + if (list_length(targets) == 1) + return; + + /* + * Stack SRF-evaluation nodes atop each path for the rel. + * + * In principle we should re-run set_cheapest() here to identify the + * cheapest path, but it seems unlikely that adding the same tlist eval + * costs to all the paths would change that, so we don't bother. Instead, + * just assume that the cheapest-startup and cheapest-total paths remain + * so. (There should be no parameterized paths anymore, so we needn't + * worry about updating cheapest_parameterized_paths.) + */ + foreach(lc, rel->pathlist) + { + Path *subpath = (Path *) lfirst(lc); + Path *newpath = subpath; + ListCell *lc1, + *lc2; + + Assert(subpath->param_info == NULL); + forboth(lc1, targets, lc2, targets_contain_srfs) + { + PathTarget *thistarget = (PathTarget *) lfirst(lc1); + bool contains_srfs = (bool) lfirst_int(lc2); + + /* If this level doesn't contain SRFs, do regular projection */ + if (contains_srfs) + newpath = (Path *) create_srf_projection_path(root, + rel, + newpath, + thistarget); + else + newpath = (Path *) apply_projection_to_path(root, + rel, + newpath, + thistarget); + } + lfirst(lc) = newpath; + if (subpath == rel->cheapest_startup_path) + rel->cheapest_startup_path = newpath; + if (subpath == rel->cheapest_total_path) + rel->cheapest_total_path = newpath; + } + + /* Likewise for partial paths, if any */ + foreach(lc, rel->partial_pathlist) + { + Path *subpath = (Path *) lfirst(lc); + Path *newpath = subpath; + ListCell *lc1, + *lc2; + + Assert(subpath->param_info == NULL); + forboth(lc1, targets, lc2, targets_contain_srfs) + { + PathTarget *thistarget = (PathTarget *) lfirst(lc1); + bool contains_srfs = (bool) lfirst_int(lc2); + + /* If this level doesn't contain SRFs, do regular projection */ + if (contains_srfs) + newpath = (Path *) create_srf_projection_path(root, + rel, + newpath, + thistarget); + else + { + /* avoid apply_projection_to_path, in case of multiple refs */ + newpath = (Path *) create_projection_path(root, + rel, + newpath, + thistarget); + } + } + lfirst(lc) = newpath; + } +} + +/* * expression_planner * Perform planner's transformations on a standalone expression. * diff --git a/src/backend/optimizer/util/clauses.c b/src/backend/optimizer/util/clauses.c index 59ccdf43d4..a763c7fe24 100644 --- a/src/backend/optimizer/util/clauses.c +++ b/src/backend/optimizer/util/clauses.c @@ -99,7 +99,6 @@ static bool contain_agg_clause_walker(Node *node, void *context); static bool get_agg_clause_costs_walker(Node *node, get_agg_clause_costs_context *context); static bool find_window_functions_walker(Node *node, WindowFuncLists *lists); -static bool expression_returns_set_rows_walker(Node *node, double *count); static bool contain_subplans_walker(Node *node, void *context); static bool contain_mutable_functions_walker(Node *node, void *context); static bool contain_volatile_functions_walker(Node *node, void *context); @@ -790,114 +789,37 @@ find_window_functions_walker(Node *node, WindowFuncLists *lists) /* * expression_returns_set_rows * Estimate the number of rows returned by a set-returning expression. - * The result is 1 if there are no set-returning functions. + * The result is 1 if it's not a set-returning expression. * - * We use the product of the rowcount estimates of all the functions in - * the given tree (this corresponds to the behavior of ExecMakeFunctionResult - * for nested set-returning functions). + * We should only examine the top-level function or operator; it used to be + * appropriate to recurse, but not anymore. (Even if there are more SRFs in + * the function's inputs, their multipliers are accounted for separately.) * * Note: keep this in sync with expression_returns_set() in nodes/nodeFuncs.c. */ double expression_returns_set_rows(Node *clause) { - double result = 1; - - (void) expression_returns_set_rows_walker(clause, &result); - return clamp_row_est(result); -} - -static bool -expression_returns_set_rows_walker(Node *node, double *count) -{ - if (node == NULL) - return false; - if (IsA(node, FuncExpr)) + if (clause == NULL) + return 1.0; + if (IsA(clause, FuncExpr)) { - FuncExpr *expr = (FuncExpr *) node; + FuncExpr *expr = (FuncExpr *) clause; if (expr->funcretset) - *count *= get_func_rows(expr->funcid); + return clamp_row_est(get_func_rows(expr->funcid)); } - if (IsA(node, OpExpr)) + if (IsA(clause, OpExpr)) { - OpExpr *expr = (OpExpr *) node; + OpExpr *expr = (OpExpr *) clause; if (expr->opretset) { set_opfuncid(expr); - *count *= get_func_rows(expr->opfuncid); + return clamp_row_est(get_func_rows(expr->opfuncid)); } } - - /* Avoid recursion for some cases that can't return a set */ - if (IsA(node, Aggref)) - return false; - if (IsA(node, WindowFunc)) - return false; - if (IsA(node, DistinctExpr)) - return false; - if (IsA(node, NullIfExpr)) - return false; - if (IsA(node, ScalarArrayOpExpr)) - return false; - if (IsA(node, BoolExpr)) - return false; - if (IsA(node, SubLink)) - return false; - if (IsA(node, SubPlan)) - return false; - if (IsA(node, AlternativeSubPlan)) - return false; - if (IsA(node, ArrayExpr)) - return false; - if (IsA(node, RowExpr)) - return false; - if (IsA(node, RowCompareExpr)) - return false; - if (IsA(node, CoalesceExpr)) - return false; - if (IsA(node, MinMaxExpr)) - return false; - if (IsA(node, XmlExpr)) - return false; - - return expression_tree_walker(node, expression_returns_set_rows_walker, - (void *) count); -} - -/* - * tlist_returns_set_rows - * Estimate the number of rows returned by a set-returning targetlist. - * The result is 1 if there are no set-returning functions. - * - * Here, the result is the largest rowcount estimate of any of the tlist's - * expressions, not the product as you would get from naively applying - * expression_returns_set_rows() to the whole tlist. The behavior actually - * implemented by ExecTargetList produces a number of rows equal to the least - * common multiple of the expression rowcounts, so that the product would be - * a worst-case estimate that is typically not realistic. Taking the max as - * we do here is a best-case estimate that might not be realistic either, - * but it's probably closer for typical usages. We don't try to compute the - * actual LCM because we're working with very approximate estimates, so their - * LCM would be unduly noisy. - */ -double -tlist_returns_set_rows(List *tlist) -{ - double result = 1; - ListCell *lc; - - foreach(lc, tlist) - { - TargetEntry *tle = (TargetEntry *) lfirst(lc); - double colresult; - - colresult = expression_returns_set_rows((Node *) tle->expr); - if (result < colresult) - result = colresult; - } - return result; + return 1.0; } diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index 3b7c56d3c7..aa635fd057 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -2227,6 +2227,9 @@ create_projection_path(PlannerInfo *root, (cpu_tuple_cost + target->cost.per_tuple) * subpath->rows; } + /* Assume no SRFs around */ + pathnode->srfpp = false; + return pathnode; } @@ -2320,6 +2323,78 @@ apply_projection_to_path(PlannerInfo *root, } /* + * create_srf_projection_path + * Creates a pathnode that represents performing a SRF projection. + * + * For the moment, we just use ProjectionPath for this, and generate a + * Result plan node. That's likely to change. + * + * 'rel' is the parent relation associated with the result + * 'subpath' is the path representing the source of data + * 'target' is the PathTarget to be computed + */ +ProjectionPath * +create_srf_projection_path(PlannerInfo *root, + RelOptInfo *rel, + Path *subpath, + PathTarget *target) +{ + ProjectionPath *pathnode = makeNode(ProjectionPath); + double tlist_rows; + ListCell *lc; + + pathnode->path.pathtype = T_Result; + pathnode->path.parent = rel; + pathnode->path.pathtarget = target; + /* For now, assume we are above any joins, so no parameterization */ + pathnode->path.param_info = NULL; + pathnode->path.parallel_aware = false; + pathnode->path.parallel_safe = rel->consider_parallel && + subpath->parallel_safe && + is_parallel_safe(root, (Node *) target->exprs); + pathnode->path.parallel_workers = subpath->parallel_workers; + /* Projection does not change the sort order */ + pathnode->path.pathkeys = subpath->pathkeys; + + pathnode->subpath = subpath; + + /* Always need the Result node */ + pathnode->dummypp = false; + pathnode->srfpp = true; + + /* + * Estimate number of rows produced by SRFs for each row of input; if + * there's more than one in this node, use the maximum. + */ + tlist_rows = 1; + foreach(lc, target->exprs) + { + Node *node = (Node *) lfirst(lc); + double itemrows; + + itemrows = expression_returns_set_rows(node); + if (tlist_rows < itemrows) + tlist_rows = itemrows; + } + + /* + * In addition to the cost of evaluating the tlist, charge cpu_tuple_cost + * per input row, and half of cpu_tuple_cost for each added output row. + * This is slightly bizarre maybe, but it's what 9.6 did; we may revisit + * this estimate later. + */ + pathnode->path.rows = subpath->rows * tlist_rows; + pathnode->path.startup_cost = subpath->startup_cost + + target->cost.startup; + pathnode->path.total_cost = subpath->total_cost + + target->cost.startup + + (cpu_tuple_cost + target->cost.per_tuple) * subpath->rows + + (pathnode->path.rows - subpath->rows) * cpu_tuple_cost / 2; + + return pathnode; +} + +/* * create_sort_path * Creates a pathnode that represents performing an explicit sort. * diff --git a/src/backend/optimizer/util/tlist.c b/src/backend/optimizer/util/tlist.c index 45205a830f..4e92ebdf41 100644 --- a/src/backend/optimizer/util/tlist.c +++ b/src/backend/optimizer/util/tlist.c @@ -16,9 +16,20 @@ #include "nodes/makefuncs.h" #include "nodes/nodeFuncs.h" +#include "optimizer/cost.h" #include "optimizer/tlist.h" +typedef struct +{ + List *nextlevel_tlist; + bool nextlevel_contains_srfs; +} split_pathtarget_context; + +static bool split_pathtarget_walker(Node *node, + split_pathtarget_context *context); + + /***************************************************************************** * Target list creation and searching utilities *****************************************************************************/ @@ -759,3 +770,191 @@ apply_pathtarget_labeling_to_tlist(List *tlist, PathTarget *target) i++; } } + +/* + * split_pathtarget_at_srfs + * Split given PathTarget into multiple levels to position SRFs safely + * + * The executor can only handle set-returning functions that appear at the + * top level of the targetlist of a Result plan node. If we have any SRFs + * that are not at top level, we need to split up the evaluation into multiple + * plan levels in which each level satisfies this constraint. This function + * creates appropriate PathTarget(s) for each level. + * + * As an example, consider the tlist expression + * x + srf1(srf2(y + z)) + * This expression should appear as-is in the top PathTarget, but below that + * we must have a PathTarget containing + * x, srf1(srf2(y + z)) + * and below that, another PathTarget containing + * x, srf2(y + z) + * and below that, another PathTarget containing + * x, y, z + * When these tlists are processed by setrefs.c, subexpressions that match + * output expressions of the next lower tlist will be replaced by Vars, + * so that what the executor gets are tlists looking like + * Var1 + Var2 + * Var1, srf1(Var2) + * Var1, srf2(Var2 + Var3) + * x, y, z + * which satisfy the desired property. + * + * In some cases, a SRF has already been evaluated in some previous plan level + * and we shouldn't expand it again (that is, what we see in the target is + * already meant as a reference to a lower subexpression). So, don't expand + * any tlist expressions that appear in input_target, if that's not NULL. + * In principle we might need to consider matching subexpressions to + * input_target, but for now it's not necessary because only ORDER BY and + * GROUP BY expressions are at issue and those will look the same at both + * plan levels. + * + * The outputs of this function are two parallel lists, one a list of + * PathTargets and the other an integer list of bool flags indicating + * whether the corresponding PathTarget contains any top-level SRFs. + * The lists are given in the order they'd need to be evaluated in, with + * the "lowest" PathTarget first. So the last list entry is always the + * originally given PathTarget, and any entries before it indicate evaluation + * levels that must be inserted below it. The first list entry must not + * contain any SRFs, since it will typically be attached to a plan node + * that cannot evaluate SRFs. + * + * Note: using a list for the flags may seem like overkill, since there + * are only a few possible patterns for which levels contain SRFs. + * But this representation decouples callers from that knowledge. + */ +void +split_pathtarget_at_srfs(PlannerInfo *root, + PathTarget *target, PathTarget *input_target, + List **targets, List **targets_contain_srfs) +{ + /* Initialize output lists to empty; we prepend to them within loop */ + *targets = *targets_contain_srfs = NIL; + + /* Loop to consider each level of PathTarget we need */ + for (;;) + { + bool target_contains_srfs = false; + split_pathtarget_context context; + ListCell *lc; + + context.nextlevel_tlist = NIL; + context.nextlevel_contains_srfs = false; + + /* + * Scan the PathTarget looking for SRFs. Top-level SRFs are handled + * in this loop, ones lower down are found by split_pathtarget_walker. + */ + foreach(lc, target->exprs) + { + Node *node = (Node *) lfirst(lc); + + /* + * A tlist item that is just a reference to an expression already + * computed in input_target need not be evaluated here, so just + * make sure it's included in the next PathTarget. + */ + if (input_target && list_member(input_target->exprs, node)) + { + context.nextlevel_tlist = lappend(context.nextlevel_tlist, node); + continue; + } + + /* Else, we need to compute this expression. */ + if (IsA(node, FuncExpr) && + ((FuncExpr *) node)->funcretset) + { + /* Top-level SRF: it can be evaluated here */ + target_contains_srfs = true; + /* Recursively examine SRF's inputs */ + split_pathtarget_walker((Node *) ((FuncExpr *) node)->args, + &context); + } + else if (IsA(node, OpExpr) && + ((OpExpr *) node)->opretset) + { + /* Same as above, but for set-returning operator */ + target_contains_srfs = true; + split_pathtarget_walker((Node *) ((OpExpr *) node)->args, + &context); + } + else + { + /* Not a top-level SRF, so recursively examine expression */ + split_pathtarget_walker(node, &context); + } + } + + /* + * Prepend current target and associated flag to output lists. + */ + *targets = lcons(target, *targets); + *targets_contain_srfs = lcons_int(target_contains_srfs, + *targets_contain_srfs); + + /* + * Done if we found no SRFs anywhere in this target; the tentative + * tlist we built for the next level can be discarded. + */ + if (!target_contains_srfs && !context.nextlevel_contains_srfs) + break; + + /* + * Else build the next PathTarget down, and loop back to process it. + * Copy the subexpressions to make sure PathTargets don't share + * substructure (might be unnecessary, but be safe); and drop any + * duplicate entries in the sub-targetlist. + */ + target = create_empty_pathtarget(); + add_new_columns_to_pathtarget(target, + (List *) copyObject(context.nextlevel_tlist)); + set_pathtarget_cost_width(root, target); + } +} + +/* Recursively examine expressions for split_pathtarget_at_srfs */ +static bool +split_pathtarget_walker(Node *node, split_pathtarget_context *context) +{ + if (node == NULL) + return false; + if (IsA(node, Var) || + IsA(node, PlaceHolderVar) || + IsA(node, Aggref) || + IsA(node, GroupingFunc) || + IsA(node, WindowFunc)) + { + /* + * Pass these items down to the child plan level for evaluation. + * + * We assume that these constructs cannot contain any SRFs (if one + * does, there will be an executor failure from a misplaced SRF). + */ + context->nextlevel_tlist = lappend(context->nextlevel_tlist, node); + + /* Having done that, we need not examine their sub-structure */ + return false; + } + else if ((IsA(node, FuncExpr) && + ((FuncExpr *) node)->funcretset) || + (IsA(node, OpExpr) && + ((OpExpr *) node)->opretset)) + { + /* + * Pass SRFs down to the child plan level for evaluation, and mark + * that it contains SRFs. (We are not at top level of our own tlist, + * else this would have been picked up by split_pathtarget_at_srfs.) + */ + context->nextlevel_tlist = lappend(context->nextlevel_tlist, node); + context->nextlevel_contains_srfs = true; + + /* Inputs to the SRF need not be considered here, so we're done */ + return false; + } + + /* + * Otherwise, the node is evaluatable within the current PathTarget, so + * recurse to examine its inputs. + */ + return expression_tree_walker(node, split_pathtarget_walker, + (void *) context); +} diff --git a/src/include/nodes/relation.h b/src/include/nodes/relation.h index e1d31c795a..de4092d679 100644 --- a/src/include/nodes/relation.h +++ b/src/include/nodes/relation.h @@ -1293,6 +1293,7 @@ typedef struct ProjectionPath Path path; Path *subpath; /* path representing input source */ bool dummypp; /* true if no separate Result is needed */ + bool srfpp; /* true if SRFs are being evaluated here */ } ProjectionPath; /* diff --git a/src/include/optimizer/clauses.h b/src/include/optimizer/clauses.h index 6173ef8d75..cc0d7b0a26 100644 --- a/src/include/optimizer/clauses.h +++ b/src/include/optimizer/clauses.h @@ -54,7 +54,6 @@ extern bool contain_window_function(Node *clause); extern WindowFuncLists *find_window_functions(Node *clause, Index maxWinRef); extern double expression_returns_set_rows(Node *clause); -extern double tlist_returns_set_rows(List *tlist); extern bool contain_subplans(Node *clause); diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h index d16f879fc1..c11c59df23 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -144,6 +144,10 @@ extern Path *apply_projection_to_path(PlannerInfo *root, RelOptInfo *rel, Path *path, PathTarget *target); +extern ProjectionPath *create_srf_projection_path(PlannerInfo *root, + RelOptInfo *rel, + Path *subpath, + PathTarget *target); extern SortPath *create_sort_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath, diff --git a/src/include/optimizer/tlist.h b/src/include/optimizer/tlist.h index f80b31a673..976024a164 100644 --- a/src/include/optimizer/tlist.h +++ b/src/include/optimizer/tlist.h @@ -61,6 +61,9 @@ extern void add_column_to_pathtarget(PathTarget *target, extern void add_new_column_to_pathtarget(PathTarget *target, Expr *expr); extern void add_new_columns_to_pathtarget(PathTarget *target, List *exprs); extern void apply_pathtarget_labeling_to_tlist(List *tlist, PathTarget *target); +extern void split_pathtarget_at_srfs(PlannerInfo *root, + PathTarget *target, PathTarget *input_target, + List **targets, List **targets_contain_srfs); /* Convenience macro to get a PathTarget with valid cost/width fields */ #define create_pathtarget(root, tlist) \ diff --git a/src/test/regress/expected/aggregates.out b/src/test/regress/expected/aggregates.out index fa1f5e7879..b71d81ee21 100644 --- a/src/test/regress/expected/aggregates.out +++ b/src/test/regress/expected/aggregates.out @@ -823,7 +823,8 @@ explain (costs off) -> Index Only Scan Backward using tenk1_unique2 on tenk1 Index Cond: (unique2 IS NOT NULL) -> Result -(7 rows) + -> Result +(8 rows) select max(unique2), generate_series(1,3) as g from tenk1 order by g desc; max | g diff --git a/src/test/regress/expected/limit.out b/src/test/regress/expected/limit.out index 9c3eecfc3b..a7ded3ad05 100644 --- a/src/test/regress/expected/limit.out +++ b/src/test/regress/expected/limit.out @@ -208,13 +208,15 @@ select currval('testseq'); explain (verbose, costs off) select unique1, unique2, generate_series(1,10) from tenk1 order by unique2 limit 7; - QUERY PLAN ----------------------------------------------------------- + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------- Limit Output: unique1, unique2, (generate_series(1, 10)) - -> Index Scan using tenk1_unique2 on public.tenk1 + -> Result Output: unique1, unique2, generate_series(1, 10) -(4 rows) + -> Index Scan using tenk1_unique2 on public.tenk1 + Output: unique1, unique2, two, four, ten, twenty, hundred, thousand, twothousand, fivethous, tenthous, odd, even, stringu1, stringu2, string4 +(6 rows) select unique1, unique2, generate_series(1,10) from tenk1 order by unique2 limit 7; diff --git a/src/test/regress/expected/rangefuncs.out b/src/test/regress/expected/rangefuncs.out index f06cfa4b21..9634fa16d2 100644 --- a/src/test/regress/expected/rangefuncs.out +++ b/src/test/regress/expected/rangefuncs.out @@ -1995,12 +1995,10 @@ SELECT *, END) FROM (VALUES (1,''), (2,'0000000049404'), (3,'FROM 10000000876')) v(id, str); - id | str | lower -----+------------------+------------------ - 1 | | - 2 | 0000000049404 | 49404 - 3 | FROM 10000000876 | from 10000000876 -(3 rows) + id | str | lower +----+---------------+------- + 2 | 0000000049404 | 49404 +(1 row) -- check whole-row-Var handling in nested lateral functions (bug #11703) create function extractq2(t int8_tbl) returns int8 as $$ diff --git a/src/test/regress/expected/subselect.out b/src/test/regress/expected/subselect.out index eda319d24b..3ed089aa46 100644 --- a/src/test/regress/expected/subselect.out +++ b/src/test/regress/expected/subselect.out @@ -807,24 +807,28 @@ select * from int4_tbl where explain (verbose, costs off) select * from int4_tbl o where (f1, f1) in (select f1, generate_series(1,2) / 10 g from int4_tbl i group by f1); - QUERY PLAN ----------------------------------------------------------------- - Hash Semi Join + QUERY PLAN +------------------------------------------------------------------- + Nested Loop Semi Join Output: o.f1 - Hash Cond: (o.f1 = "ANY_subquery".f1) + Join Filter: (o.f1 = "ANY_subquery".f1) -> Seq Scan on public.int4_tbl o Output: o.f1 - -> Hash + -> Materialize Output: "ANY_subquery".f1, "ANY_subquery".g -> Subquery Scan on "ANY_subquery" Output: "ANY_subquery".f1, "ANY_subquery".g Filter: ("ANY_subquery".f1 = "ANY_subquery".g) - -> HashAggregate - Output: i.f1, (generate_series(1, 2) / 10) - Group Key: i.f1 - -> Seq Scan on public.int4_tbl i - Output: i.f1 -(15 rows) + -> Result + Output: i.f1, ((generate_series(1, 2)) / 10) + -> Result + Output: i.f1, generate_series(1, 2) + -> HashAggregate + Output: i.f1 + Group Key: i.f1 + -> Seq Scan on public.int4_tbl i + Output: i.f1 +(19 rows) select * from int4_tbl o where (f1, f1) in (select f1, generate_series(1,2) / 10 g from int4_tbl i group by f1); diff --git a/src/test/regress/expected/tsrf.out b/src/test/regress/expected/tsrf.out index 7bb6d17fcb..f257537925 100644 --- a/src/test/regress/expected/tsrf.out +++ b/src/test/regress/expected/tsrf.out @@ -43,7 +43,16 @@ SELECT generate_series(1, generate_series(1, 3)); -- srf, with two SRF arguments SELECT generate_series(generate_series(1,3), generate_series(2, 4)); -ERROR: functions and operators can take at most one set argument + generate_series +----------------- + 1 + 2 + 2 + 3 + 3 + 4 +(6 rows) + CREATE TABLE few(id int, dataa text, datab text); INSERT INTO few VALUES(1, 'a', 'foo'),(2, 'a', 'bar'),(3, 'b', 'bar'); -- SRF output order of sorting is maintained, if SRF is not referenced -- 2.11.0.22.g8d7a455.dirty