From 11f93f75f05315404d5838ff050b0d66d1633a78 Mon Sep 17 00:00:00 2001 From: Etsuro Fujita Date: Sat, 4 Apr 2020 00:05:59 +0900 Subject: [PATCH 2/2] Changes. --- doc/src/sgml/config.sgml | 6 +- src/backend/nodes/outfuncs.c | 2 +- src/backend/optimizer/README | 27 +++++ src/backend/optimizer/path/joinrels.c | 183 +++++++++++++++++++--------------- src/backend/optimizer/util/relnode.c | 10 +- src/backend/partitioning/partbounds.c | 86 ++++++++++------ src/include/nodes/pathnodes.h | 11 +- 7 files changed, 198 insertions(+), 127 deletions(-) diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index 2de21903a1..50cffb8694 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -4693,9 +4693,9 @@ ANY num_sync ( part_scheme == rel1->part_scheme && joinrel->part_scheme == rel2->part_scheme); - /* - * If we don't have the partition bounds for the join rel yet, try to - * compute those along with pairs of partitions to be joined. - */ - if (joinrel->nparts == -1) - { - PartitionScheme part_scheme = joinrel->part_scheme; - PartitionBoundInfo boundinfo = NULL; - int nparts = 0; - - Assert(joinrel->boundinfo == NULL); - Assert(joinrel->part_rels == NULL); - - /* - * See if the partition bounds for inputs are exactly the same, in - * which case we don't need to work hard: the join rel have the same - * partition bounds as inputs, and the partitions with the same - * cardinal positions form the pairs. - * - * Note: even in cases where one or both inputs have merged bounds, - * it would be possible for both the bounds to be exactly the same, but - * it seems unlikely to be worth the cycles to check. - */ - if (!rel1->merged && - !rel2->merged && - rel1->nparts == rel2->nparts && - partition_bounds_equal(part_scheme->partnatts, - part_scheme->parttyplen, - part_scheme->parttypbyval, - rel1->boundinfo, rel2->boundinfo)) - { - boundinfo = rel1->boundinfo; - nparts = rel1->nparts; - } - else - { - /* Try merging the partition bounds for inputs. */ - boundinfo = partition_bounds_merge(part_scheme->partnatts, - part_scheme->partsupfunc, - part_scheme->partcollation, - rel1, rel2, - parent_sjinfo->jointype, - &parts1, &parts2); - if (boundinfo == NULL) - { - joinrel->nparts = 0; - return; - } - nparts = list_length(parts1); - merged = true; - } - - Assert(nparts > 0); - joinrel->boundinfo = boundinfo; - joinrel->merged = merged; - joinrel->nparts = nparts; - joinrel->part_rels = - (RelOptInfo **) palloc0(sizeof(RelOptInfo *) * nparts); - } - else - { - Assert(joinrel->nparts > 0); - Assert(joinrel->boundinfo); - Assert(joinrel->part_rels); + Assert(!(joinrel->partbounds_merged && (joinrel->nparts <= 0))); - /* - * If the join rel's merged flag is true, it means inputs are not - * guaranteed to have the same partition bounds, therefore we can't - * assume that the partitions at the same cardinal positions form the - * pairs; let get_matching_part_pairs() generate the pairs. Otherwise, - * nothing to do since we can assume that. - */ - if (joinrel->merged) - { - get_matching_part_pairs(root, joinrel, rel1, rel2, - &parts1, &parts2); - Assert(list_length(parts1) == joinrel->nparts); - Assert(list_length(parts2) == joinrel->nparts); - merged = true; - } - } + compute_partition_bounds(root, rel1, rel2, joinrel, parent_sjinfo, + &parts1, &parts2); - if (merged) + if (joinrel->partbounds_merged) { lcr1 = list_head(parts1); lcr2 = list_head(parts2); @@ -1503,7 +1429,7 @@ try_partitionwise_join(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2, AppendRelInfo **appinfos; int nappinfos; - if (merged) + if (joinrel->partbounds_merged) { child_rel1 = lfirst_node(RelOptInfo, lcr1); child_rel2 = lfirst_node(RelOptInfo, lcr2); @@ -1835,6 +1761,97 @@ match_expr_to_partition_keys(Expr *expr, RelOptInfo *rel, bool strict_op) return -1; } +/* + * compute_partition_bounds + * Compute the partition bounds for a join rel from those for inputs + */ +static void +compute_partition_bounds(PlannerInfo *root, RelOptInfo *rel1, + RelOptInfo *rel2, RelOptInfo *joinrel, + SpecialJoinInfo *parent_sjinfo, + List **parts1, List **parts2) +{ + /* + * If we don't have the partition bounds for the join rel yet, try to + * compute those along with pairs of partitions to be joined. + */ + if (joinrel->nparts == -1) + { + PartitionScheme part_scheme = joinrel->part_scheme; + PartitionBoundInfo boundinfo = NULL; + int nparts = 0; + + Assert(joinrel->boundinfo == NULL); + Assert(joinrel->part_rels == NULL); + + /* + * See if the partition bounds for inputs are exactly the same, in + * which case we don't need to work hard: the join rel have the same + * partition bounds as inputs, and the partitions with the same + * cardinal positions form the pairs. + * + * Note: even in cases where one or both inputs have merged bounds, + * it would be possible for both the bounds to be exactly the same, but + * it seems unlikely to be worth the cycles to check. + */ + if (!rel1->partbounds_merged && + !rel2->partbounds_merged && + rel1->nparts == rel2->nparts && + partition_bounds_equal(part_scheme->partnatts, + part_scheme->parttyplen, + part_scheme->parttypbyval, + rel1->boundinfo, rel2->boundinfo)) + { + boundinfo = rel1->boundinfo; + nparts = rel1->nparts; + } + else + { + /* Try merging the partition bounds for inputs. */ + boundinfo = partition_bounds_merge(part_scheme->partnatts, + part_scheme->partsupfunc, + part_scheme->partcollation, + rel1, rel2, + parent_sjinfo->jointype, + parts1, parts2); + if (boundinfo == NULL) + { + joinrel->nparts = 0; + return; + } + nparts = list_length(*parts1); + joinrel->partbounds_merged = true; + } + + Assert(nparts > 0); + joinrel->boundinfo = boundinfo; + joinrel->nparts = nparts; + joinrel->part_rels = + (RelOptInfo **) palloc0(sizeof(RelOptInfo *) * nparts); + } + else + { + Assert(joinrel->nparts > 0); + Assert(joinrel->boundinfo); + Assert(joinrel->part_rels); + + /* + * If the join rel's partbounds_merged flag is true, it means inputs + * are not guaranteed to have the same partition bounds, therefore we + * can't assume that the partitions at the same cardinal positions form + * the pairs; let get_matching_part_pairs() generate the pairs. + * Otherwise, nothing to do since we can assume that. + */ + if (joinrel->partbounds_merged) + { + get_matching_part_pairs(root, joinrel, rel1, rel2, + parts1, parts2); + Assert(list_length(*parts1) == joinrel->nparts); + Assert(list_length(*parts2) == joinrel->nparts); + } + } +} + /* * get_matching_part_pairs * Generate pairs of partitions to be joined from the two inputs diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c index 0e4944ac8e..433f031d0f 100644 --- a/src/backend/optimizer/util/relnode.c +++ b/src/backend/optimizer/util/relnode.c @@ -242,7 +242,7 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent) rel->part_scheme = NULL; rel->nparts = -1; rel->boundinfo = NULL; - rel->merged = false; + rel->partbounds_merged = false; rel->partition_qual = NIL; rel->part_rels = NULL; rel->all_partrels = NULL; @@ -657,7 +657,7 @@ build_join_rel(PlannerInfo *root, joinrel->part_scheme = NULL; joinrel->nparts = -1; joinrel->boundinfo = NULL; - joinrel->merged = false; + joinrel->partbounds_merged = false; joinrel->partition_qual = NIL; joinrel->part_rels = NULL; joinrel->all_partrels = NULL; @@ -835,7 +835,7 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel, joinrel->part_scheme = NULL; joinrel->nparts = -1; joinrel->boundinfo = NULL; - joinrel->merged = false; + joinrel->partbounds_merged = false; joinrel->partition_qual = NIL; joinrel->part_rels = NULL; joinrel->all_partrels = NULL; @@ -1668,8 +1668,8 @@ build_joinrel_partition_info(RelOptInfo *joinrel, RelOptInfo *outer_rel, !joinrel->boundinfo); /* - * If the join relation is partitioned, it use the same partitioning scheme - * as the joining relations. + * If the join relation is partitioned, it uses the same partitioning + * scheme as the joining relations. * * Note: we calculate the partition bounds, number of partitions, and * child-join relations of the join relation in try_partitionwise_join(). diff --git a/src/backend/partitioning/partbounds.c b/src/backend/partitioning/partbounds.c index 24dbc2c8f3..e4c74d6c03 100644 --- a/src/backend/partitioning/partbounds.c +++ b/src/backend/partitioning/partbounds.c @@ -69,7 +69,10 @@ typedef struct PartitionRangeBound bool lower; /* this is the lower (vs upper) bound */ } PartitionRangeBound; -/* Per-partitioned-relation data for merge_list_bounds()/merge_range_bounds() */ +/* + * Mapping from partitions of a partitioned relation to partitions of a join + * relation supposed to be partitioned (a.k.a merged partitions) + */ typedef struct PartitionMap { int nparts; /* number of partitions */ @@ -998,16 +1001,18 @@ partition_bounds_copy(PartitionBoundInfo src, /* * partition_bounds_merge + * Check to see if there is a relationship where each partition of + * 'outer_rel' matches/overlaps at most one partition of 'inner_rel', and + * vice versa; and if so, build and return the partition bounds for a join + * relation between the rels, generating two lists of matching/overlapping + * partitions, which are returned to *outer_parts and *inner_parts + * respectively. * - * This function builds and returns the partition bounds for a join relation - * between input relations, creating two lists of partitions, which are - * returned to *outer_parts and *inner_parts respectively. The lists contain - * the same number of partitions, and the partitions at the same positions in - * the lists indicate join pairs used for partitioned join. - * - * This function returns NULL, setting *outer_parts and *inner_parts to NIL, - * if a partition on one side matches multiple partitions on the other side, - * in which case we currently don't support partitioned join. + * The lists contain the same number of partitions, and the partitions at the + * same positions in the lists indicate join pairs used for partitioned join. + * If a partition on one side matches/overlaps multiple partitions on the other + * side, this function returns NULL, setting *outer_parts and *inner_parts to + * NIL. */ PartitionBoundInfo partition_bounds_merge(int partnatts, @@ -1018,24 +1023,20 @@ partition_bounds_merge(int partnatts, { PartitionBoundInfo outer_binfo = outer_rel->boundinfo; PartitionBoundInfo inner_binfo = inner_rel->boundinfo; - char strategy; /* * Currently, this function is called only from try_partitionwise_join(), * so the join type should be INNER, LEFT, FULL, SEMI, or ANTI. */ - if (jointype != JOIN_INNER && jointype != JOIN_LEFT && - jointype != JOIN_FULL && jointype != JOIN_SEMI && - jointype != JOIN_ANTI) - elog(ERROR, "unrecognized join type: %d", (int) jointype); + Assert(jointype == JOIN_INNER || jointype == JOIN_LEFT || + jointype == JOIN_FULL || jointype == JOIN_SEMI || + jointype == JOIN_ANTI); - /* Bail out if the partitioning strategies are different. */ - if (outer_binfo->strategy != inner_binfo->strategy) - return NULL; + /* The partitioning strategies should be the same. */ + Assert(outer_binfo->strategy == inner_binfo->strategy); - strategy = outer_binfo->strategy; *outer_parts = *inner_parts = NIL; - switch (strategy) + switch (outer_binfo->strategy) { case PARTITION_STRATEGY_HASH: @@ -1075,7 +1076,8 @@ partition_bounds_merge(int partnatts, inner_parts); default: - elog(ERROR, "unexpected partition strategy: %d", (int) strategy); + elog(ERROR, "unexpected partition strategy: %d", + (int) outer_binfo->strategy); return NULL; /* keep compiler quiet */ } } @@ -1084,6 +1086,18 @@ partition_bounds_merge(int partnatts, * merge_list_bounds * Create the partition bounds for a join relation between list * partitioned tables, if possible + * + * In this function we try to find matching partitions from both sides by + * comparing list values stored in their partition bounds. Since the list + * values appear in the ascending order, an algorithm similar to merge join is + * used for that. If a partition doesn't have a matching partition on the + * other side, the algorithm tries to match it with the default partition on + * the other side if any; if not, the algorithm tries to match it with a + * dummy partition on the other side if it is on the non-nullable side of an + * outer join. Also, if both sides have the default partitions, the algorithm + * tries to match them with each other. We give up if the algorithm finds a + * partition matching multiple partitions on the other side, which is the + * scenario the current implementation of partitioned join can't handle. */ static PartitionBoundInfo merge_list_bounds(FmgrInfo *partsupfunc, Oid *partcollation, @@ -1379,6 +1393,18 @@ cleanup: * merge_range_bounds * Create the partition bounds for a join relation between range * partitioned tables, if possible + * + * In this function we try to find overlapping partitions from both sides by + * comparing ranges stored in their partition bounds. Since the ranges + * appear in the ascending order, an algorithm similar to merge join is + * used for that. If a partition doesn't have an overlapping partition on the + * other side, the algorithm tries to match it with the default partition on + * the other side if any; if not, the algorithm tries to match it with a + * dummy partition on the other side if it is on the non-nullable side of an + * outer join. Also, if both sides have the default partitions, the algorithm + * tries to match them with each other. We give up if the algorithm finds a + * partition overlapping multiple partitions on the other side, which is the + * scenario the current implementation of partitioned join can't handle. */ static PartitionBoundInfo merge_range_bounds(int partnatts, FmgrInfo *partsupfuncs, @@ -1851,8 +1877,8 @@ merge_matching_partitions(PartitionMap *outer_map, PartitionMap *inner_map, * index of the merged partition if successful, -1 otherwise * * If the partition is newly created, *next_index is incremented. Also, if it - * is the default partition of the join relation, *default_partition is set to - * the index if not already done. + * is the default partition of the join relation, *default_index is set to the + * index if not already done. */ static int process_outer_partition(PartitionMap *outer_map, @@ -1901,7 +1927,7 @@ process_outer_partition(PartitionMap *outer_map, * has to be scanned all the way anyway, so the resulting partition * will contain all key values from the default partition, which any * other partition of the join relation will not contain. Thus the - * resutling partition will act as the default partition of the join + * resulting partition will act as the default partition of the join * relation; record the index in *default_index if not already done. */ if (jointype == JOIN_FULL) @@ -1932,8 +1958,8 @@ process_outer_partition(PartitionMap *outer_map, * index of the merged partition if successful, -1 otherwise * * If the partition is newly created, *next_index is incremented. Also, if it - * is the default partition of the join relation, *default_partition is set to - * the index if not already done. + * is the default partition of the join relation, *default_index is set to the + * index if not already done. */ static int process_inner_partition(PartitionMap *outer_map, @@ -1982,7 +2008,7 @@ process_inner_partition(PartitionMap *outer_map, * has to be scanned all the way anyway, so the resulting partition * will contain all key values from the default partition, which any * other partition of the join relation will not contain. Thus the - * resutling partition will act as the default partition of the join + * resulting partition will act as the default partition of the join * relation; record the index in *default_index if not already done. */ if (IS_OUTER_JOIN(jointype)) @@ -2044,13 +2070,13 @@ merge_null_partitions(PartitionMap *outer_map, { Assert(outer_null >= 0 && outer_null < outer_map->nparts); if (outer_map->merged_indexes[outer_null] == -1) - consider_outer_null = true; + consider_outer_null = true; } if (inner_has_null) { Assert(inner_null >= 0 && inner_null < inner_map->nparts); if (inner_map->merged_indexes[inner_null] == -1) - consider_inner_null = true; + consider_inner_null = true; } /* If both flags are set false, we don't need to do anything. */ @@ -2235,7 +2261,7 @@ merge_default_partitions(PartitionMap *outer_map, * * Note: The caller assumes that the given partition doesn't have a non-dummy * matching partition on the other side, but if the given partition finds the - * matchig partition later, we will adjust the assignment. + * matching partition later, we will adjust the assignment. */ static int merge_partition_with_dummy(PartitionMap *map, int index, int *next_index) diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h index 7f9c4ab1f1..0b1eb00223 100644 --- a/src/include/nodes/pathnodes.h +++ b/src/include/nodes/pathnodes.h @@ -579,7 +579,7 @@ typedef struct PartitionSchemeData *PartitionScheme; * part_scheme - Partitioning scheme of the relation * nparts - Number of partitions * boundinfo - Partition bounds - * merged - true if partition bounds are merged ones + * partbounds_merged - true if partition bounds are merged ones * partition_qual - Partition constraint if not the root * part_rels - RelOptInfos for each partition * all_partrels - Relids set of all partition relids @@ -720,11 +720,12 @@ typedef struct RelOptInfo /* used for partitioned relations */ PartitionScheme part_scheme; /* Partitioning scheme. */ - int nparts; /* number of partitions; 0 = not partitioned; - * -1 = not yet set */ + int nparts; /* number of partitions; -1 if not yet set; + * in case of a join relation 0 means it's + * considered unpartitioned */ struct PartitionBoundInfoData *boundinfo; /* Partition bounds */ - bool merged; /* true if partition bounds were created by - * partition_bounds_merge() */ + bool partbounds_merged; /* true if partition bounds were created by + * partition_bounds_merge() */ List *partition_qual; /* partition constraint */ struct RelOptInfo **part_rels; /* Array of RelOptInfos of partitions, * stored in the same order of bounds */ -- 2.14.3 (Apple Git-98)