From 9a43cf0fb74e242f56c9d290c5544743b4b7375d Mon Sep 17 00:00:00 2001
From: Tomas Vondra <tomas.vondra@postgresql.org>
Date: Tue, 9 Mar 2021 21:09:14 +0100
Subject: [PATCH 2/2] review

---
 src/backend/optimizer/path/costsize.c | 77 ++++++++++++++++++++-------
 src/backend/optimizer/path/pathkeys.c | 14 ++++-
 2 files changed, 69 insertions(+), 22 deletions(-)

diff --git a/src/backend/optimizer/path/costsize.c b/src/backend/optimizer/path/costsize.c
index 1639258aaf..f55a4f20e5 100644
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -1755,6 +1755,8 @@ cost_recursive_union(Path *runion, Path *nrterm, Path *rterm)
  * is_fake_var
  *		Workaround for generate_append_tlist() which generates fake Vars with
  *		varno == 0, that will cause a fail of estimate_num_group() call
+ *
+ * XXX Ummm, why would estimate_num_group fail with this?
  */
 static bool
 is_fake_var(Expr *expr)
@@ -1828,27 +1830,53 @@ get_width_cost_multiplier(PlannerInfo *root, Expr *expr)
  * compute_cpu_sort_cost
  *		compute CPU cost of sort (i.e. in-memory)
  *
+ * The main thing we need to calculate to estimate sort CPU costs is the number
+ * of calls to the comparator functions. The difficulty is that for multi-column
+ * sorts there may be different data types involved (for some of which the calls
+ * may be much more expensive). Furthermore, the columns may have very different
+ * number of distinct values - the higher the number, the fewer comparisons will
+ * be needed for the following columns.
+ *
+ * The algoritm is incremental - we add pathkeys one by one, and at each step we
+ * estimate the number of necessary comparisons (based on the number of distinct
+ * groups in the current pathkey prefix and the new pathkey), and the comparison
+ * costs (which is data type specific).
+ *
+ * Estimation of the number of comparisons is based on ideas from two sources:
+ *
+ * 1) "Algorithms" (course), Robert Sedgewick, Kevin Wayne [https://algs4.cs.princeton.edu/home/]
+ *
+ * 2) "Quicksort Is Optimal For Many Equal Keys" (paper), Sebastian Wild,
+ * arXiv:1608.04906v4 [cs.DS] 1 Nov 2017. [https://arxiv.org/abs/1608.04906]
+ *
+ * In term of that paper, let N - number of tuples, Xi - number of tuples with
+ * key Ki, then the estimate of number of comparisons is:
+ *
+ *	log(N! / (X1! * X2! * ..))  ~  sum(Xi * log(N/Xi))
+ *
+ * In our case all Xi are the same because now we don't have any estimation of
+ * group sizes, we have only know the estimate of number of groups (distinct
+ * values). In that case, formula becomes:
+ *
+ *	N * log(NumberOfGroups)
+ *
+ * For multi-column sorts we need to estimate the number of comparisons for
+ * each individual column - for example with columns (c1, c2, ..., ck) we
+ * can estimate that number of comparions on ck is roughly
+ *
+ *	ncomparisons(c1, c2, ..., ck) / ncomparisons(c1, c2, ..., c(k-1))
+ *
+ * Let k be a column number, Gk - number of groups defined by k columns, and Fk
+ * the cost of the comparison is
+ *
+ *	N * sum( Fk * log(Gk) )
+ *
+ * Note: We also consider column witdth, not just the comparator cost.
+ *
  * NOTE: some callers currently pass NIL for pathkeys because they
  * can't conveniently supply the sort keys.  In this case, it will fallback to
  * simple comparison cost estimate.
- *
- * Estimation algorithm is based on ideas from course Algorithms,
- * Robert Sedgewick, Kevin Wayne, https://algs4.cs.princeton.edu/home/ and paper
- * "Quicksort Is Optimal For Many Equal Keys", Sebastian Wild,
- * arXiv:1608.04906v4 [cs.DS] 1 Nov 2017.
- *
- * In term of that papers, let N - number of tuples, Xi - number of tuples with
- * key Ki, then estimation is:
- * log(N! / (X1! * X2! * ..))  ~  sum(Xi * log(N/Xi))
- * In our case all Xi are the same because now we don't have an estimation of
- * group sizes, we have only estimation of number of groups. In this case,
- * formula becomes: N * log(NumberOfGroups). Next, to support correct estimation
- * of multi-column sort we need separately compute each column, so, let k is a
- * column number, Gk - number of groups  defined by k columns:
- * N * sum( Fk * log(Gk) )
- * Fk is a function costs (including width) for k columns.
  */
-
 static Cost
 compute_cpu_sort_cost(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
 					  Cost comparison_cost, double tuples, double output_tuples,
@@ -1862,7 +1890,7 @@ compute_cpu_sort_cost(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
 	bool		has_fake_var = false;
 	int			i = 0;
 	Oid			prev_datatype = InvalidOid;
-	Cost		funcCost = 0.;
+	Cost		funcCost = 0.0;
 	List		*cache_varinfos = NIL;

 	/* fallback if pathkeys is unknown */
@@ -1873,6 +1901,10 @@ compute_cpu_sort_cost(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
 		 * a total number of tuple comparisons of N log2 K; but the constant
 		 * factor is a bit higher than for quicksort.  Tweak it so that the
 		 * cost curve is continuous at the crossover point.
+		 *
+		 * XXX I suppose the "quicksort factor" references to 1.5 at the end
+		 * of this function, but I'm not sure. I suggest we introduce some simple
+		 * constants for that, instead of magic values.
 		 */
 		output_tuples = (heapSort) ? 2.0 * output_tuples : tuples;
 		per_tuple_cost += 2.0 * cpu_operator_cost * LOG2(output_tuples);
@@ -1888,7 +1920,6 @@ compute_cpu_sort_cost(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
 	 * - per column comparison function cost
 	 * - we try to compute needed number of comparison per column
 	 */
-
 	foreach(lc, pathkeys)
 	{
 		PathKey				*pathkey = (PathKey*)lfirst(lc);
@@ -1952,6 +1983,11 @@ compute_cpu_sort_cost(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
 			 * Don't use DEFAULT_NUM_DISTINCT because it used for only one
 			 * column while here we try to estimate number of groups over
 			 * set of columns.
+			 *
+			 * XXX Perhaps this should use DEFAULT_NUM_DISTINCT at least to
+			 * limit the calculated values, somehow?
+			 *
+			 * XXX What's the logic of the following formula?
 			 */
 			nGroups = ceil(2.0 + sqrt(tuples) *
 				list_length(pathkeyExprs) / list_length(pathkeys));
@@ -1968,6 +2004,7 @@ compute_cpu_sort_cost(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
 			{
 				if (tuplesPerPrevGroup < output_tuples)
 					/* comparing only inside output_tuples */
+					/* XXX why not to use the same multiplier (1.5)? */
 					correctedNGroups =
 						ceil(2.0 * output_tuples / (tuplesPerPrevGroup / nGroups));
 				else
@@ -1993,7 +2030,7 @@ compute_cpu_sort_cost(PlannerInfo *root, List *pathkeys, int nPresortedKeys,
 		tuplesPerPrevGroup = ceil(1.5 * tuplesPerPrevGroup / nGroups);

 		/*
-		 * We could skip all followed columns for cost estimation, because we
+		 * We could skip all following columns for cost estimation, because we
 		 * believe that tuples are unique by set ot previous columns
 		 */
 		if (tuplesPerPrevGroup <= 1.0)
diff --git a/src/backend/optimizer/path/pathkeys.c b/src/backend/optimizer/path/pathkeys.c
index 7beb32488a..b092c3e055 100644
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -515,10 +515,19 @@ pathkey_sort_cost_comparator(const void *_a, const void *_b)
 		return 0;
 	return 1;
 }
+
 /*
  * Order tail of list of group pathkeys by uniqueness descendetly. It allows to
  * speedup sorting. Returns newly allocated lists, old ones stay untouched.
  * n_preordered defines a head of list which order should be prevented.
+ *
+ * XXX But we're not generating this only based on uniqueness (that's a bad
+ * term anyway, because we're using ndistinct estimates, not uniqueness).
+ * We're also using the comparator cost to calculate the expected sort cost,
+ * and optimize that.
+ *
+ * XXX This should explain how we generate the values - all permutations for
+ * up to 4 values, etc.
  */
 void
 get_cheapest_group_keys_order(PlannerInfo *root, double nrows,
@@ -597,8 +606,9 @@ get_cheapest_group_keys_order(PlannerInfo *root, double nrows,
 	else
 	{
 		/*
-		 * Since v13 list_free() can clean list elements so for original list not to be modified it should be copied to
-		 * a new one which can then be cleaned safely if needed.
+		 * Since v13 list_free() can clean list elements so for original list
+		 * not to be modified it should be copied to a new one which can then
+		 * be cleaned safely if needed.
 		 */
 		new_group_pathkeys = list_copy(*group_pathkeys);
 		nToPermute = nFreeKeys;
-- 
2.29.2