From 8634a6a99398cc8572e7848aeb956a4464312d76 Mon Sep 17 00:00:00 2001 From: Amul Sul Date: Tue, 10 Oct 2017 15:08:34 +0530 Subject: [PATCH 3/3] Enable partition wise join support v2 v2: Moved switch case from partition_bounds_copy() to 0001 patch. v1: -Few changes to enable partition wise join support for the hash partitioned table. -Added regression tests. --- src/backend/catalog/partition.c | 16 ++++-- src/backend/optimizer/path/joinrels.c | 10 +++- src/test/regress/expected/partition_join.out | 81 ++++++++++++++++++++++++++++ src/test/regress/sql/partition_join.sql | 32 +++++++++++ 4 files changed, 133 insertions(+), 6 deletions(-) diff --git a/src/backend/catalog/partition.c b/src/backend/catalog/partition.c index 1b894bd1be..61de13fb07 100644 --- a/src/backend/catalog/partition.c +++ b/src/backend/catalog/partition.c @@ -889,15 +889,23 @@ partition_bounds_copy(PartitionBoundInfo src, for (i = 0; i < ndatums; i++) { int j; - dest->datums[i] = (Datum *) palloc(sizeof(Datum) * partnatts); - for (j = 0; j < partnatts; j++) + /* + * For a corresponding to hash partition, datums array will have two + * elements - modulus and remainder. + */ + bool hash_part = (key->strategy == PARTITION_STRATEGY_HASH); + int dim = hash_part? 2 : partnatts; + + dest->datums[i] = (Datum *) palloc(sizeof(Datum) * dim); + + for (j = 0; j < dim; j++) { if (dest->kind == NULL || dest->kind[i][j] == PARTITION_RANGE_DATUM_VALUE) dest->datums[i][j] = datumCopy(src->datums[i][j], - key->parttypbyval[j], - key->parttyplen[j]); + hash_part? true : key->parttypbyval[j], + key->parttyplen[j]); } } diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c index 2b868c52de..5df861f9cf 100644 --- a/src/backend/optimizer/path/joinrels.c +++ b/src/backend/optimizer/path/joinrels.c @@ -1515,8 +1515,14 @@ have_partkey_equi_join(RelOptInfo *rel1, RelOptInfo *rel2, JoinType jointype, * The clause allows partition-wise join if only it uses the same * operator family as that specified by the partition key. */ - if (!list_member_oid(rinfo->mergeopfamilies, - part_scheme->partopfamily[ipk1])) + if (rel1->part_scheme->strategy == PARTITION_STRATEGY_HASH) + { + if (!op_in_opfamily(rinfo->hashjoinoperator, + part_scheme->partopfamily[ipk1])) + continue; + } + else if (!list_member_oid(rinfo->mergeopfamilies, + part_scheme->partopfamily[ipk1])) continue; /* Mark the partition key as having an equi-join clause. */ diff --git a/src/test/regress/expected/partition_join.out b/src/test/regress/expected/partition_join.out index 234b8b5381..1c8cdb34fb 100644 --- a/src/test/regress/expected/partition_join.out +++ b/src/test/regress/expected/partition_join.out @@ -1256,6 +1256,87 @@ SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1 WHERE a = 1 AND a = 2) t1 One-Time Filter: false (14 rows) +-- +-- tests for hash partitioned tables. +-- +CREATE TABLE pht1 (a int, b int, c text) PARTITION BY HASH(c); +CREATE TABLE pht1_p1 PARTITION OF pht1 FOR VALUES WITH (MODULUS 3, REMAINDER 0); +CREATE TABLE pht1_p2 PARTITION OF pht1 FOR VALUES WITH (MODULUS 3, REMAINDER 1); +CREATE TABLE pht1_p3 PARTITION OF pht1 FOR VALUES WITH (MODULUS 3, REMAINDER 2); +INSERT INTO pht1 SELECT i, i, to_char(i/50, 'FM0000') FROM generate_series(0, 599, 2) i; +ANALYZE pht1; +CREATE TABLE pht2 (a int, b int, c text) PARTITION BY HASH(c); +CREATE TABLE pht2_p1 PARTITION OF pht2 FOR VALUES WITH (MODULUS 3, REMAINDER 0); +CREATE TABLE pht2_p2 PARTITION OF pht2 FOR VALUES WITH (MODULUS 3, REMAINDER 1); +CREATE TABLE pht2_p3 PARTITION OF pht2 FOR VALUES WITH (MODULUS 3, REMAINDER 2); +INSERT INTO pht2 SELECT i, i, to_char(i/50, 'FM0000') FROM generate_series(0, 599, 3) i; +ANALYZE pht2; +-- +-- hash partitioned by expression +-- +CREATE TABLE pht1_e (a int, b int, c text) PARTITION BY HASH(ltrim(c, 'A')); +CREATE TABLE pht1_e_p1 PARTITION OF pht1_e FOR VALUES WITH (MODULUS 3, REMAINDER 0); +CREATE TABLE pht1_e_p2 PARTITION OF pht1_e FOR VALUES WITH (MODULUS 3, REMAINDER 1); +CREATE TABLE pht1_e_p3 PARTITION OF pht1_e FOR VALUES WITH (MODULUS 3, REMAINDER 2); +INSERT INTO pht1_e SELECT i, i, 'A' || to_char(i/50, 'FM0000') FROM generate_series(0, 599, 2) i; +ANALYZE pht1_e; +-- test partition matching with N-way join +EXPLAIN (COSTS OFF) +SELECT avg(t1.a), avg(t2.b), avg(t3.a + t3.b), t1.c, t2.c, t3.c FROM pht1 t1, pht2 t2, pht1_e t3 WHERE t1.c = t2.c AND ltrim(t3.c, 'A') = t1.c GROUP BY t1.c, t2.c, t3.c ORDER BY t1.c, t2.c, t3.c; + QUERY PLAN +-------------------------------------------------------------------------------------- + Sort + Sort Key: t1.c, t3.c + -> HashAggregate + Group Key: t1.c, t2.c, t3.c + -> Result + -> Append + -> Hash Join + Hash Cond: (t1.c = t2.c) + -> Seq Scan on pht1_p1 t1 + -> Hash + -> Hash Join + Hash Cond: (t2.c = ltrim(t3.c, 'A'::text)) + -> Seq Scan on pht2_p1 t2 + -> Hash + -> Seq Scan on pht1_e_p1 t3 + -> Hash Join + Hash Cond: (t1_1.c = t2_1.c) + -> Seq Scan on pht1_p2 t1_1 + -> Hash + -> Hash Join + Hash Cond: (t2_1.c = ltrim(t3_1.c, 'A'::text)) + -> Seq Scan on pht2_p2 t2_1 + -> Hash + -> Seq Scan on pht1_e_p2 t3_1 + -> Hash Join + Hash Cond: (t1_2.c = t2_2.c) + -> Seq Scan on pht1_p3 t1_2 + -> Hash + -> Hash Join + Hash Cond: (t2_2.c = ltrim(t3_2.c, 'A'::text)) + -> Seq Scan on pht2_p3 t2_2 + -> Hash + -> Seq Scan on pht1_e_p3 t3_2 +(33 rows) + +SELECT avg(t1.a), avg(t2.b), avg(t3.a + t3.b), t1.c, t2.c, t3.c FROM pht1 t1, pht2 t2, pht1_e t3 WHERE t1.c = t2.c AND ltrim(t3.c, 'A') = t1.c GROUP BY t1.c, t2.c, t3.c ORDER BY t1.c, t2.c, t3.c; + avg | avg | avg | c | c | c +----------------------+----------------------+-----------------------+------+------+------- + 24.0000000000000000 | 24.0000000000000000 | 48.0000000000000000 | 0000 | 0000 | A0000 + 74.0000000000000000 | 75.0000000000000000 | 148.0000000000000000 | 0001 | 0001 | A0001 + 124.0000000000000000 | 124.5000000000000000 | 248.0000000000000000 | 0002 | 0002 | A0002 + 174.0000000000000000 | 174.0000000000000000 | 348.0000000000000000 | 0003 | 0003 | A0003 + 224.0000000000000000 | 225.0000000000000000 | 448.0000000000000000 | 0004 | 0004 | A0004 + 274.0000000000000000 | 274.5000000000000000 | 548.0000000000000000 | 0005 | 0005 | A0005 + 324.0000000000000000 | 324.0000000000000000 | 648.0000000000000000 | 0006 | 0006 | A0006 + 374.0000000000000000 | 375.0000000000000000 | 748.0000000000000000 | 0007 | 0007 | A0007 + 424.0000000000000000 | 424.5000000000000000 | 848.0000000000000000 | 0008 | 0008 | A0008 + 474.0000000000000000 | 474.0000000000000000 | 948.0000000000000000 | 0009 | 0009 | A0009 + 524.0000000000000000 | 525.0000000000000000 | 1048.0000000000000000 | 0010 | 0010 | A0010 + 574.0000000000000000 | 574.5000000000000000 | 1148.0000000000000000 | 0011 | 0011 | A0011 +(12 rows) + -- -- multiple levels of partitioning -- diff --git a/src/test/regress/sql/partition_join.sql b/src/test/regress/sql/partition_join.sql index ca525d9941..2316bbdcb8 100644 --- a/src/test/regress/sql/partition_join.sql +++ b/src/test/regress/sql/partition_join.sql @@ -229,6 +229,38 @@ SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1 WHERE a = 1 AND a = 2) t1 EXPLAIN (COSTS OFF) SELECT t1.a, t1.c, t2.b, t2.c FROM (SELECT * FROM prt1 WHERE a = 1 AND a = 2) t1 FULL JOIN prt2 t2 ON t1.a = t2.b WHERE t2.a = 0 ORDER BY t1.a, t2.b; +-- +-- tests for hash partitioned tables. +-- +CREATE TABLE pht1 (a int, b int, c text) PARTITION BY HASH(c); +CREATE TABLE pht1_p1 PARTITION OF pht1 FOR VALUES WITH (MODULUS 3, REMAINDER 0); +CREATE TABLE pht1_p2 PARTITION OF pht1 FOR VALUES WITH (MODULUS 3, REMAINDER 1); +CREATE TABLE pht1_p3 PARTITION OF pht1 FOR VALUES WITH (MODULUS 3, REMAINDER 2); +INSERT INTO pht1 SELECT i, i, to_char(i/50, 'FM0000') FROM generate_series(0, 599, 2) i; +ANALYZE pht1; + +CREATE TABLE pht2 (a int, b int, c text) PARTITION BY HASH(c); +CREATE TABLE pht2_p1 PARTITION OF pht2 FOR VALUES WITH (MODULUS 3, REMAINDER 0); +CREATE TABLE pht2_p2 PARTITION OF pht2 FOR VALUES WITH (MODULUS 3, REMAINDER 1); +CREATE TABLE pht2_p3 PARTITION OF pht2 FOR VALUES WITH (MODULUS 3, REMAINDER 2); +INSERT INTO pht2 SELECT i, i, to_char(i/50, 'FM0000') FROM generate_series(0, 599, 3) i; +ANALYZE pht2; + +-- +-- hash partitioned by expression +-- +CREATE TABLE pht1_e (a int, b int, c text) PARTITION BY HASH(ltrim(c, 'A')); +CREATE TABLE pht1_e_p1 PARTITION OF pht1_e FOR VALUES WITH (MODULUS 3, REMAINDER 0); +CREATE TABLE pht1_e_p2 PARTITION OF pht1_e FOR VALUES WITH (MODULUS 3, REMAINDER 1); +CREATE TABLE pht1_e_p3 PARTITION OF pht1_e FOR VALUES WITH (MODULUS 3, REMAINDER 2); +INSERT INTO pht1_e SELECT i, i, 'A' || to_char(i/50, 'FM0000') FROM generate_series(0, 599, 2) i; +ANALYZE pht1_e; + +-- test partition matching with N-way join +EXPLAIN (COSTS OFF) +SELECT avg(t1.a), avg(t2.b), avg(t3.a + t3.b), t1.c, t2.c, t3.c FROM pht1 t1, pht2 t2, pht1_e t3 WHERE t1.c = t2.c AND ltrim(t3.c, 'A') = t1.c GROUP BY t1.c, t2.c, t3.c ORDER BY t1.c, t2.c, t3.c; +SELECT avg(t1.a), avg(t2.b), avg(t3.a + t3.b), t1.c, t2.c, t3.c FROM pht1 t1, pht2 t2, pht1_e t3 WHERE t1.c = t2.c AND ltrim(t3.c, 'A') = t1.c GROUP BY t1.c, t2.c, t3.c ORDER BY t1.c, t2.c, t3.c; + -- -- multiple levels of partitioning -- -- 2.14.1