From d1662203f8463fefe28224f5b0eff4fd6e4981ba Mon Sep 17 00:00:00 2001 From: Nils Dijk Date: Tue, 21 Dec 2021 16:50:42 +0000 Subject: [PATCH 2/3] refactor add_path_decision into path_compare --- src/backend/optimizer/util/pathnode.c | 371 +++++++++++++++----------- src/include/optimizer/pathnode.h | 8 + 2 files changed, 229 insertions(+), 150 deletions(-) diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c index c54f947515..f333069872 100644 --- a/src/backend/optimizer/util/pathnode.c +++ b/src/backend/optimizer/util/pathnode.c @@ -35,14 +35,6 @@ #include "utils/memutils.h" #include "utils/selfuncs.h" -typedef enum -{ - COSTS_EQUAL, /* path costs are fuzzily equal */ - COSTS_BETTER1, /* first path is cheaper than second */ - COSTS_BETTER2, /* second path is cheaper than first */ - COSTS_DIFFERENT /* neither path dominates the other on cost */ -} PathCostComparison; - typedef enum { REJECTED, /* Reject the propesed path */ @@ -169,7 +161,7 @@ compare_fractional_path_costs(Path *path1, Path *path2, * (But if total costs are fuzzily equal, we compare startup costs anyway, * in hopes of eliminating one path or the other.) */ -static PathCostComparison +static PathComparison compare_path_costs_fuzzily(Path *path1, Path *path2, double fuzz_factor) { #define CONSIDER_PATH_STARTUP_COST(p) \ @@ -186,10 +178,10 @@ compare_path_costs_fuzzily(Path *path1, Path *path2, double fuzz_factor) path2->startup_cost > path1->startup_cost * fuzz_factor) { /* ... but path2 fuzzily worse on startup, so DIFFERENT */ - return COSTS_DIFFERENT; + return PATHS_DIFFERENT; } /* else path2 dominates */ - return COSTS_BETTER2; + return PATHS_BETTER2; } if (path2->total_cost > path1->total_cost * fuzz_factor) { @@ -198,24 +190,24 @@ compare_path_costs_fuzzily(Path *path1, Path *path2, double fuzz_factor) path1->startup_cost > path2->startup_cost * fuzz_factor) { /* ... but path1 fuzzily worse on startup, so DIFFERENT */ - return COSTS_DIFFERENT; + return PATHS_DIFFERENT; } /* else path1 dominates */ - return COSTS_BETTER1; + return PATHS_BETTER1; } /* fuzzily the same on total cost ... */ if (path1->startup_cost > path2->startup_cost * fuzz_factor) { /* ... but path1 fuzzily worse on startup, so path2 wins */ - return COSTS_BETTER2; + return PATHS_BETTER2; } if (path2->startup_cost > path1->startup_cost * fuzz_factor) { /* ... but path2 fuzzily worse on startup, so path1 wins */ - return COSTS_BETTER1; + return PATHS_BETTER1; } /* fuzzily the same on both costs */ - return COSTS_EQUAL; + return PATHS_EQUAL; #undef CONSIDER_PATH_STARTUP_COST } @@ -368,6 +360,207 @@ set_cheapest(RelOptInfo *parent_rel) } +/* + * path_compare_pathkeys + * Given two paths, compare the paths based on their pathkeys (sort order). + * As per discussion in discussion in src/backend/optimizer/README we treat + * a path as having no sort order when the paths are parameterized. + * + * Returns whether paths are EQUAL, DIFFERENT or either PATH1 or PATH2 is + * better. + */ +static PathComparison +path_compare_pathkeys(Path *path1, Path *path2) +{ + PathKeysComparison keyscmp; + List *pathkeys1; + List *pathkeys2; + + /* Pretend parameterized paths have no pathkeys, per comment above */ + pathkeys1 = path1->param_info ? NIL : path1->pathkeys; + pathkeys2 = path2->param_info ? NIL : path2->pathkeys; + keyscmp = compare_pathkeys(pathkeys1, pathkeys2); + switch (keyscmp) + { + case PATHKEYS_EQUAL: + return PATHS_EQUAL; + case PATHKEYS_BETTER1: + return PATHS_BETTER1; + case PATHKEYS_BETTER2: + return PATHS_BETTER2; + case PATHKEYS_DIFFERENT: + return PATHS_DIFFERENT; + default: + /* should not happen, treat as different */ + return PATHS_DIFFERENT; + } +} + + +/* + * path_compare_param_info + * Given two paths, compare the paths based on the required relations for + * based on their parameterization. + * + * Returns whether paths are EQUAL, DIFFERENT or either PATH1 or PATH2 is + * better. + */ +static PathComparison +path_compare_param_info(Path *path1, Path *path2) +{ + BMS_Comparison outercmp = bms_subset_compare(PATH_REQ_OUTER(path1), + PATH_REQ_OUTER(path2)); + + switch (outercmp) + { + case BMS_EQUAL: + return PATHS_EQUAL; + case BMS_SUBSET1: + return PATHS_BETTER1; + case BMS_SUBSET2: + return PATHS_BETTER2; + case BMS_DIFFERENT: + return PATHS_DIFFERENT; + default: + /* should not happen, treat as different */ + return PATHS_DIFFERENT; + } +} + + +/* + * path_compare_rows + * Given two paths, compare the paths based on the estimated number of rows + * they return. + * + * Returns whether paths are EQUAL, DIFFERENT or either PATH1 or PATH2 is + * better. + */ +static PathComparison +path_compare_rows(Path *path1, Path *path2) +{ + if (path1->rows < path2->rows) + return PATHS_BETTER1; + if (path1->rows > path2->rows) + return PATHS_BETTER2; + return PATHS_EQUAL; +} + + +/* + * path_compare_parallel_safe + * Given two paths, compare the paths based on the whether they are safe to + * run in parallel. We find paths better if they are able to be run in + * parallel. + * + * Returns whether paths are EQUAL, DIFFERENT or either PATH1 or PATH2 is + * better. + */ +static PathComparison +path_compare_parallel_safe(Path *path1, Path *path2) +{ + if (path1->parallel_safe > path2->parallel_safe) + return PATHS_BETTER1; + if (path1->parallel_safe < path2->parallel_safe) + return PATHS_BETTER2; + return PATHS_EQUAL; +} + + +/* + * path_compare + * Given two paths we decide if path1 is better, path2 is better, the paths + * are equal or the paths are different. To come to a conclusion we combine + * the result of multiple dimensions on which paths can differentiate from + * each other. + * - cost + * - sorting + * - parameterization + * - rowcount + * - parallel safety + * + * Mostly these dimensions are compared and combined on their own with few + * exceptions: + * - sorting gets treated as equal for parameterized paths as per + * discussion in src/backend/optimizer/README + * - rowcount and parallel safety are only combined if we are leaning + * towards a dominating path. Otherwise we treat parallel_safety always + * over rowcount, and rowcount over a smaller fuzzyness cost compare. + */ +static PathComparison +path_compare(Path *path1, Path *path2) +{ + PathComparison cmp = PATHS_EQUAL; + + /* do a fuzzy cost comparison with standard fuzziness limit */ + cmp |= compare_path_costs_fuzzily(path1, path2, STD_FUZZ_FACTOR); + if (cmp == PATHS_DIFFERENT) + return cmp; + + /* compare paths based on pathkeys and combine results */ + cmp |= path_compare_pathkeys(path1, path2); + if (cmp == PATHS_DIFFERENT) + return cmp; + + /* compare paths on parameterization */ + cmp |= path_compare_param_info(path1, path2); + if (cmp == PATHS_DIFFERENT) + return cmp; + + /* Keep compatibility with the original decision tree from add_path */ + if (cmp != PATHS_EQUAL) + { + /* + * When paths are not deemed equal by the earlier dimensions we + * compare them on, we treat both rowcount and parallel_safe as two + * extra dimensions we can compare two paths on. To keep consistency + * we call separated functions to compare both and merge to the final + * result. + */ + + cmp |= path_compare_rows(path1, path2); + if (cmp == PATHS_DIFFERENT) + return cmp; + + cmp |= path_compare_parallel_safe(path1, path2); + if (cmp == PATHS_DIFFERENT) + return cmp; + } + else + { + /* + * Only when all previous comparisons treat the paths equal we fall + * back to the retained logics of choosing which path is better: + * + * Same pathkeys and outer rels, and fuzzily the same cost, so keep + * just one; to decide which, first check parallel-safety, then rows, + * then do a fuzzy cost comparison with very small fuzz limit. (We + * used to do an exact cost comparison, but that results in annoying + * platform-specific plan variations due to roundoff in the cost + * estimates.) If things are still tied, arbitrarily keep only the + * old path. Notice that we will keep only the old path even if the + * less-fuzzy comparison decides the startup and total costs compare + * differently. + */ + if (path1->parallel_safe > path2->parallel_safe) + return PATHS_BETTER1; + else if (path1->parallel_safe < path2->parallel_safe) + return PATHS_BETTER2; + else if (path1->rows < path2->rows) + return PATHS_BETTER1; + else if (path1->rows > path2->rows) + return PATHS_BETTER2; + else if (compare_path_costs_fuzzily(path1, + path2, + 1.0000000001) == PATHS_BETTER1) + return PATHS_BETTER1; + else + return PATHS_EQUAL; + } + + return cmp; +} + /* * add_path_decision * Takes a new_path and an old_path. Based on the paths it makes a decision @@ -400,143 +593,21 @@ set_cheapest(RelOptInfo *parent_rel) static AddPathDecision add_path_decision(Path *new_path, Path *old_path) { - PathCostComparison costcmp; - PathKeysComparison keyscmp; - BMS_Comparison outercmp; - - List *new_path_pathkeys; - List *old_path_pathkeys; + PathComparison cmp = path_compare(new_path, old_path); - /* - * Do a fuzzy cost comparison with standard fuzziness limit. - */ - costcmp = compare_path_costs_fuzzily(new_path, old_path, - STD_FUZZ_FACTOR); - - /* - * If the two paths compare differently for startup and total cost, then - * we want to keep both, and we can skip comparing pathkeys and - * required_outer rels. If they compare the same, proceed with the other - * comparisons. Row count is checked last. (We make the tests in this - * order because the cost comparison is most likely to turn out - * "different", and the pathkeys comparison next most likely. As - * explained above, row count very seldom makes a difference, so even - * though it's cheap to compare there's not much point in checking it - * earlier.) - */ - if (costcmp == COSTS_DIFFERENT) - { - return CONTINUE; - } - - /* Pretend parameterized paths have no pathkeys, per comment above */ - new_path_pathkeys = new_path->param_info ? NIL : new_path->pathkeys; - old_path_pathkeys = old_path->param_info ? NIL : old_path->pathkeys; - keyscmp = compare_pathkeys(new_path_pathkeys, old_path_pathkeys); - if (keyscmp == PATHKEYS_DIFFERENT) + switch (cmp) { - return CONTINUE; - } - - switch (costcmp) - { - case COSTS_EQUAL: - outercmp = bms_subset_compare(PATH_REQ_OUTER(new_path), - PATH_REQ_OUTER(old_path)); - if (keyscmp == PATHKEYS_BETTER1) - { - if ((outercmp == BMS_EQUAL || - outercmp == BMS_SUBSET1) && - new_path->rows <= old_path->rows && - new_path->parallel_safe >= old_path->parallel_safe) - return DOMINATES; /* new dominates old */ - } - else if (keyscmp == PATHKEYS_BETTER2) - { - if ((outercmp == BMS_EQUAL || - outercmp == BMS_SUBSET2) && - new_path->rows >= old_path->rows && - new_path->parallel_safe <= old_path->parallel_safe) - return REJECTED; /* old dominates new */ - } - else /* keyscmp == PATHKEYS_EQUAL */ - { - if (outercmp == BMS_EQUAL) - { - /* - * Same pathkeys and outer rels, and fuzzily the same - * cost, so keep just one; to decide which, first check - * parallel-safety, then rows, then do a fuzzy cost - * comparison with very small fuzz limit. (We used to do - * an exact cost comparison, but that results in annoying - * platform-specific plan variations due to roundoff in - * the cost estimates.) If things are still tied, - * arbitrarily keep only the old path. Notice that we - * will keep only the old path even if the less-fuzzy - * comparison decides the startup and total costs compare - * differently. - */ - if (new_path->parallel_safe > - old_path->parallel_safe) - return DOMINATES; /* new dominates old */ - else if (new_path->parallel_safe < - old_path->parallel_safe) - return REJECTED; /* old dominates new */ - else if (new_path->rows < old_path->rows) - return DOMINATES; /* new dominates old */ - else if (new_path->rows > old_path->rows) - return REJECTED; /* old dominates new */ - else if (compare_path_costs_fuzzily(new_path, - old_path, - 1.0000000001) == COSTS_BETTER1) - return DOMINATES; /* new dominates old */ - else - return REJECTED; /* old equals or dominates new */ - } - else if (outercmp == BMS_SUBSET1 && - new_path->rows <= old_path->rows && - new_path->parallel_safe >= old_path->parallel_safe) - return DOMINATES; /* new dominates old */ - else if (outercmp == BMS_SUBSET2 && - new_path->rows >= old_path->rows && - new_path->parallel_safe <= old_path->parallel_safe) - return REJECTED; /* old dominates new */ - /* else different parameterizations, keep both */ - } - break; - case COSTS_BETTER1: - if (keyscmp != PATHKEYS_BETTER2) - { - outercmp = bms_subset_compare(PATH_REQ_OUTER(new_path), - PATH_REQ_OUTER(old_path)); - if ((outercmp == BMS_EQUAL || - outercmp == BMS_SUBSET1) && - new_path->rows <= old_path->rows && - new_path->parallel_safe >= old_path->parallel_safe) - return DOMINATES; /* new dominates old */ - } - break; - case COSTS_BETTER2: - if (keyscmp != PATHKEYS_BETTER1) - { - outercmp = bms_subset_compare(PATH_REQ_OUTER(new_path), - PATH_REQ_OUTER(old_path)); - if ((outercmp == BMS_EQUAL || - outercmp == BMS_SUBSET2) && - new_path->rows >= old_path->rows && - new_path->parallel_safe <= old_path->parallel_safe) - return REJECTED; /* old dominates new */ - } - break; - case COSTS_DIFFERENT: - - /* - * can't get here, but keep this case to keep compiler quiet - */ - break; + case PATHS_BETTER1: + return DOMINATES; + case PATHS_BETTER2: + case PATHS_EQUAL: /* when paths are equal only keep the oldest */ + return REJECTED; + case PATHS_DIFFERENT: + return CONTINUE; + default: + /* should not be reached */ + return CONTINUE; } - - return CONTINUE; } /* diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h index 2922c0cdc1..1ee4a91ab8 100644 --- a/src/include/optimizer/pathnode.h +++ b/src/include/optimizer/pathnode.h @@ -21,6 +21,14 @@ /* * prototypes for pathnode.c */ +typedef enum +{ + PATHS_EQUAL = 0, /* paths compare (potentially fuzzily) equal */ + PATHS_BETTER1 = 1, /* path1 is better on interesting dimensions */ + PATHS_BETTER2 = 2, /* path2 is better on interesting dimensions */ + PATHS_DIFFERENT = 3 /* no path is better on all dimensions */ +} PathComparison; + extern int compare_path_costs(Path *path1, Path *path2, CostSelector criterion); extern int compare_fractional_path_costs(Path *path1, Path *path2, -- 2.32.0 (Apple Git-132)