From d1662203f8463fefe28224f5b0eff4fd6e4981ba Mon Sep 17 00:00:00 2001
From: Nils Dijk <me@thanod.nl>
Date: Tue, 21 Dec 2021 16:50:42 +0000
Subject: [PATCH 2/3] refactor add_path_decision into path_compare

---
 src/backend/optimizer/util/pathnode.c | 371 +++++++++++++++-----------
 src/include/optimizer/pathnode.h      |   8 +
 2 files changed, 229 insertions(+), 150 deletions(-)

diff --git a/src/backend/optimizer/util/pathnode.c b/src/backend/optimizer/util/pathnode.c
index c54f947515..f333069872 100644
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -35,14 +35,6 @@
 #include "utils/memutils.h"
 #include "utils/selfuncs.h"
 
-typedef enum
-{
-	COSTS_EQUAL,				/* path costs are fuzzily equal */
-	COSTS_BETTER1,				/* first path is cheaper than second */
-	COSTS_BETTER2,				/* second path is cheaper than first */
-	COSTS_DIFFERENT				/* neither path dominates the other on cost */
-} PathCostComparison;
-
 typedef enum
 {
 	REJECTED,					/* Reject the propesed path */
@@ -169,7 +161,7 @@ compare_fractional_path_costs(Path *path1, Path *path2,
  * (But if total costs are fuzzily equal, we compare startup costs anyway,
  * in hopes of eliminating one path or the other.)
  */
-static PathCostComparison
+static PathComparison
 compare_path_costs_fuzzily(Path *path1, Path *path2, double fuzz_factor)
 {
 #define CONSIDER_PATH_STARTUP_COST(p)  \
@@ -186,10 +178,10 @@ compare_path_costs_fuzzily(Path *path1, Path *path2, double fuzz_factor)
 			path2->startup_cost > path1->startup_cost * fuzz_factor)
 		{
 			/* ... but path2 fuzzily worse on startup, so DIFFERENT */
-			return COSTS_DIFFERENT;
+			return PATHS_DIFFERENT;
 		}
 		/* else path2 dominates */
-		return COSTS_BETTER2;
+		return PATHS_BETTER2;
 	}
 	if (path2->total_cost > path1->total_cost * fuzz_factor)
 	{
@@ -198,24 +190,24 @@ compare_path_costs_fuzzily(Path *path1, Path *path2, double fuzz_factor)
 			path1->startup_cost > path2->startup_cost * fuzz_factor)
 		{
 			/* ... but path1 fuzzily worse on startup, so DIFFERENT */
-			return COSTS_DIFFERENT;
+			return PATHS_DIFFERENT;
 		}
 		/* else path1 dominates */
-		return COSTS_BETTER1;
+		return PATHS_BETTER1;
 	}
 	/* fuzzily the same on total cost ... */
 	if (path1->startup_cost > path2->startup_cost * fuzz_factor)
 	{
 		/* ... but path1 fuzzily worse on startup, so path2 wins */
-		return COSTS_BETTER2;
+		return PATHS_BETTER2;
 	}
 	if (path2->startup_cost > path1->startup_cost * fuzz_factor)
 	{
 		/* ... but path2 fuzzily worse on startup, so path1 wins */
-		return COSTS_BETTER1;
+		return PATHS_BETTER1;
 	}
 	/* fuzzily the same on both costs */
-	return COSTS_EQUAL;
+	return PATHS_EQUAL;
 
 #undef CONSIDER_PATH_STARTUP_COST
 }
@@ -368,6 +360,207 @@ set_cheapest(RelOptInfo *parent_rel)
 }
 
 
+/*
+ * path_compare_pathkeys
+ *    Given two paths, compare the paths based on their pathkeys (sort order).
+ *    As per discussion in discussion in src/backend/optimizer/README we treat
+ *    a path as having no sort order when the paths are parameterized.
+ *
+ * Returns whether paths are EQUAL, DIFFERENT or either PATH1 or PATH2 is
+ * better.
+ */
+static PathComparison
+path_compare_pathkeys(Path *path1, Path *path2)
+{
+	PathKeysComparison keyscmp;
+	List	   *pathkeys1;
+	List	   *pathkeys2;
+
+	/* Pretend parameterized paths have no pathkeys, per comment above */
+	pathkeys1 = path1->param_info ? NIL : path1->pathkeys;
+	pathkeys2 = path2->param_info ? NIL : path2->pathkeys;
+	keyscmp = compare_pathkeys(pathkeys1, pathkeys2);
+	switch (keyscmp)
+	{
+		case PATHKEYS_EQUAL:
+			return PATHS_EQUAL;
+		case PATHKEYS_BETTER1:
+			return PATHS_BETTER1;
+		case PATHKEYS_BETTER2:
+			return PATHS_BETTER2;
+		case PATHKEYS_DIFFERENT:
+			return PATHS_DIFFERENT;
+		default:
+			/* should not happen, treat as different */
+			return PATHS_DIFFERENT;
+	}
+}
+
+
+/*
+ * path_compare_param_info
+ *    Given two paths, compare the paths based on the required relations for
+ *    based on their parameterization.
+ *
+ * Returns whether paths are EQUAL, DIFFERENT or either PATH1 or PATH2 is
+ * better.
+ */
+static PathComparison
+path_compare_param_info(Path *path1, Path *path2)
+{
+	BMS_Comparison outercmp = bms_subset_compare(PATH_REQ_OUTER(path1),
+												 PATH_REQ_OUTER(path2));
+
+	switch (outercmp)
+	{
+		case BMS_EQUAL:
+			return PATHS_EQUAL;
+		case BMS_SUBSET1:
+			return PATHS_BETTER1;
+		case BMS_SUBSET2:
+			return PATHS_BETTER2;
+		case BMS_DIFFERENT:
+			return PATHS_DIFFERENT;
+		default:
+			/* should not happen, treat as different */
+			return PATHS_DIFFERENT;
+	}
+}
+
+
+/*
+ * path_compare_rows
+ *    Given two paths, compare the paths based on the estimated number of rows
+ *    they return.
+ *
+ * Returns whether paths are EQUAL, DIFFERENT or either PATH1 or PATH2 is
+ * better.
+ */
+static PathComparison
+path_compare_rows(Path *path1, Path *path2)
+{
+	if (path1->rows < path2->rows)
+		return PATHS_BETTER1;
+	if (path1->rows > path2->rows)
+		return PATHS_BETTER2;
+	return PATHS_EQUAL;
+}
+
+
+/*
+ * path_compare_parallel_safe
+ *    Given two paths, compare the paths based on the whether they are safe to
+ *    run in parallel. We find paths better if they are able to be run in
+ *    parallel.
+ *
+ * Returns whether paths are EQUAL, DIFFERENT or either PATH1 or PATH2 is
+ * better.
+ */
+static PathComparison
+path_compare_parallel_safe(Path *path1, Path *path2)
+{
+	if (path1->parallel_safe > path2->parallel_safe)
+		return PATHS_BETTER1;
+	if (path1->parallel_safe < path2->parallel_safe)
+		return PATHS_BETTER2;
+	return PATHS_EQUAL;
+}
+
+
+/*
+ * path_compare
+ *    Given two paths we decide if path1 is better, path2 is better, the paths
+ *    are equal or the paths are different. To come to a conclusion we combine
+ *    the result of multiple dimensions on which paths can differentiate from
+ *    each other.
+ *    - cost
+ *    - sorting
+ *    - parameterization
+ *    - rowcount
+ *    - parallel safety
+ *
+ *    Mostly these dimensions are compared and combined on their own with few
+ *    exceptions:
+ *    - sorting gets treated as equal for parameterized paths as per
+ *      discussion in src/backend/optimizer/README
+ *    - rowcount and parallel safety are only combined if we are leaning
+ *      towards a dominating path. Otherwise we treat parallel_safety always
+ *      over rowcount, and rowcount over a smaller fuzzyness cost compare.
+ */
+static PathComparison
+path_compare(Path *path1, Path *path2)
+{
+	PathComparison cmp = PATHS_EQUAL;
+
+	/* do a fuzzy cost comparison with standard fuzziness limit */
+	cmp |= compare_path_costs_fuzzily(path1, path2, STD_FUZZ_FACTOR);
+	if (cmp == PATHS_DIFFERENT)
+		return cmp;
+
+	/* compare paths based on pathkeys and combine results */
+	cmp |= path_compare_pathkeys(path1, path2);
+	if (cmp == PATHS_DIFFERENT)
+		return cmp;
+
+	/* compare paths on parameterization */
+	cmp |= path_compare_param_info(path1, path2);
+	if (cmp == PATHS_DIFFERENT)
+		return cmp;
+
+	/* Keep compatibility with the original decision tree from add_path */
+	if (cmp != PATHS_EQUAL)
+	{
+		/*
+		 * When paths are not deemed equal by the earlier dimensions we
+		 * compare them on, we treat both rowcount and parallel_safe as two
+		 * extra dimensions we can compare two paths on. To keep consistency
+		 * we call separated functions to compare both and merge to the final
+		 * result.
+		 */
+
+		cmp |= path_compare_rows(path1, path2);
+		if (cmp == PATHS_DIFFERENT)
+			return cmp;
+
+		cmp |= path_compare_parallel_safe(path1, path2);
+		if (cmp == PATHS_DIFFERENT)
+			return cmp;
+	}
+	else
+	{
+		/*
+		 * Only when all previous comparisons treat the paths equal we fall
+		 * back to the retained logics of choosing which path is better:
+		 *
+		 * Same pathkeys and outer rels, and fuzzily the same cost, so keep
+		 * just one; to decide which, first check parallel-safety, then rows,
+		 * then do a fuzzy cost comparison with very small fuzz limit.  (We
+		 * used to do an exact cost comparison, but that results in annoying
+		 * platform-specific plan variations due to roundoff in the cost
+		 * estimates.)	If things are still tied, arbitrarily keep only the
+		 * old path.  Notice that we will keep only the old path even if the
+		 * less-fuzzy comparison decides the startup and total costs compare
+		 * differently.
+		 */
+		if (path1->parallel_safe > path2->parallel_safe)
+			return PATHS_BETTER1;
+		else if (path1->parallel_safe < path2->parallel_safe)
+			return PATHS_BETTER2;
+		else if (path1->rows < path2->rows)
+			return PATHS_BETTER1;
+		else if (path1->rows > path2->rows)
+			return PATHS_BETTER2;
+		else if (compare_path_costs_fuzzily(path1,
+											path2,
+											1.0000000001) == PATHS_BETTER1)
+			return PATHS_BETTER1;
+		else
+			return PATHS_EQUAL;
+	}
+
+	return cmp;
+}
+
 /*
  * add_path_decision
  *    Takes a new_path and an old_path. Based on the paths it makes a decision
@@ -400,143 +593,21 @@ set_cheapest(RelOptInfo *parent_rel)
 static AddPathDecision
 add_path_decision(Path *new_path, Path *old_path)
 {
-	PathCostComparison costcmp;
-	PathKeysComparison keyscmp;
-	BMS_Comparison outercmp;
-
-	List	   *new_path_pathkeys;
-	List	   *old_path_pathkeys;
+	PathComparison cmp = path_compare(new_path, old_path);
 
-	/*
-	 * Do a fuzzy cost comparison with standard fuzziness limit.
-	 */
-	costcmp = compare_path_costs_fuzzily(new_path, old_path,
-										 STD_FUZZ_FACTOR);
-
-	/*
-	 * If the two paths compare differently for startup and total cost, then
-	 * we want to keep both, and we can skip comparing pathkeys and
-	 * required_outer rels.  If they compare the same, proceed with the other
-	 * comparisons.  Row count is checked last.  (We make the tests in this
-	 * order because the cost comparison is most likely to turn out
-	 * "different", and the pathkeys comparison next most likely.  As
-	 * explained above, row count very seldom makes a difference, so even
-	 * though it's cheap to compare there's not much point in checking it
-	 * earlier.)
-	 */
-	if (costcmp == COSTS_DIFFERENT)
-	{
-		return CONTINUE;
-	}
-
-	/* Pretend parameterized paths have no pathkeys, per comment above */
-	new_path_pathkeys = new_path->param_info ? NIL : new_path->pathkeys;
-	old_path_pathkeys = old_path->param_info ? NIL : old_path->pathkeys;
-	keyscmp = compare_pathkeys(new_path_pathkeys, old_path_pathkeys);
-	if (keyscmp == PATHKEYS_DIFFERENT)
+	switch (cmp)
 	{
-		return CONTINUE;
-	}
-
-	switch (costcmp)
-	{
-		case COSTS_EQUAL:
-			outercmp = bms_subset_compare(PATH_REQ_OUTER(new_path),
-										  PATH_REQ_OUTER(old_path));
-			if (keyscmp == PATHKEYS_BETTER1)
-			{
-				if ((outercmp == BMS_EQUAL ||
-					 outercmp == BMS_SUBSET1) &&
-					new_path->rows <= old_path->rows &&
-					new_path->parallel_safe >= old_path->parallel_safe)
-					return DOMINATES;	/* new dominates old */
-			}
-			else if (keyscmp == PATHKEYS_BETTER2)
-			{
-				if ((outercmp == BMS_EQUAL ||
-					 outercmp == BMS_SUBSET2) &&
-					new_path->rows >= old_path->rows &&
-					new_path->parallel_safe <= old_path->parallel_safe)
-					return REJECTED;	/* old dominates new */
-			}
-			else				/* keyscmp == PATHKEYS_EQUAL */
-			{
-				if (outercmp == BMS_EQUAL)
-				{
-					/*
-					 * Same pathkeys and outer rels, and fuzzily the same
-					 * cost, so keep just one; to decide which, first check
-					 * parallel-safety, then rows, then do a fuzzy cost
-					 * comparison with very small fuzz limit.  (We used to do
-					 * an exact cost comparison, but that results in annoying
-					 * platform-specific plan variations due to roundoff in
-					 * the cost estimates.)	If things are still tied,
-					 * arbitrarily keep only the old path.  Notice that we
-					 * will keep only the old path even if the less-fuzzy
-					 * comparison decides the startup and total costs compare
-					 * differently.
-					 */
-					if (new_path->parallel_safe >
-						old_path->parallel_safe)
-						return DOMINATES;	/* new dominates old */
-					else if (new_path->parallel_safe <
-							 old_path->parallel_safe)
-						return REJECTED;	/* old dominates new */
-					else if (new_path->rows < old_path->rows)
-						return DOMINATES;	/* new dominates old */
-					else if (new_path->rows > old_path->rows)
-						return REJECTED;	/* old dominates new */
-					else if (compare_path_costs_fuzzily(new_path,
-														old_path,
-														1.0000000001) == COSTS_BETTER1)
-						return DOMINATES;	/* new dominates old */
-					else
-						return REJECTED;	/* old equals or dominates new */
-				}
-				else if (outercmp == BMS_SUBSET1 &&
-						 new_path->rows <= old_path->rows &&
-						 new_path->parallel_safe >= old_path->parallel_safe)
-					return DOMINATES;	/* new dominates old */
-				else if (outercmp == BMS_SUBSET2 &&
-						 new_path->rows >= old_path->rows &&
-						 new_path->parallel_safe <= old_path->parallel_safe)
-					return REJECTED;	/* old dominates new */
-				/* else different parameterizations, keep both */
-			}
-			break;
-		case COSTS_BETTER1:
-			if (keyscmp != PATHKEYS_BETTER2)
-			{
-				outercmp = bms_subset_compare(PATH_REQ_OUTER(new_path),
-											  PATH_REQ_OUTER(old_path));
-				if ((outercmp == BMS_EQUAL ||
-					 outercmp == BMS_SUBSET1) &&
-					new_path->rows <= old_path->rows &&
-					new_path->parallel_safe >= old_path->parallel_safe)
-					return DOMINATES;	/* new dominates old */
-			}
-			break;
-		case COSTS_BETTER2:
-			if (keyscmp != PATHKEYS_BETTER1)
-			{
-				outercmp = bms_subset_compare(PATH_REQ_OUTER(new_path),
-											  PATH_REQ_OUTER(old_path));
-				if ((outercmp == BMS_EQUAL ||
-					 outercmp == BMS_SUBSET2) &&
-					new_path->rows >= old_path->rows &&
-					new_path->parallel_safe <= old_path->parallel_safe)
-					return REJECTED;	/* old dominates new */
-			}
-			break;
-		case COSTS_DIFFERENT:
-
-			/*
-			 * can't get here, but keep this case to keep compiler quiet
-			 */
-			break;
+		case PATHS_BETTER1:
+			return DOMINATES;
+		case PATHS_BETTER2:
+		case PATHS_EQUAL:		/* when paths are equal only keep the oldest */
+			return REJECTED;
+		case PATHS_DIFFERENT:
+			return CONTINUE;
+		default:
+			/* should not be reached */
+			return CONTINUE;
 	}
-
-	return CONTINUE;
 }
 
 /*
diff --git a/src/include/optimizer/pathnode.h b/src/include/optimizer/pathnode.h
index 2922c0cdc1..1ee4a91ab8 100644
--- a/src/include/optimizer/pathnode.h
+++ b/src/include/optimizer/pathnode.h
@@ -21,6 +21,14 @@
 /*
  * prototypes for pathnode.c
  */
+typedef enum
+{
+	PATHS_EQUAL = 0,			/* paths compare (potentially fuzzily) equal */
+	PATHS_BETTER1 = 1,			/* path1 is better on interesting dimensions */
+	PATHS_BETTER2 = 2,			/* path2 is better on interesting dimensions */
+	PATHS_DIFFERENT = 3			/* no path is better on all dimensions */
+} PathComparison;
+
 extern int	compare_path_costs(Path *path1, Path *path2,
 							   CostSelector criterion);
 extern int	compare_fractional_path_costs(Path *path1, Path *path2,
-- 
2.32.0 (Apple Git-132)