From a7ace03974bb716e7869863e619209cbd7326a7c Mon Sep 17 00:00:00 2001 From: Andres Freund Date: Tue, 22 Nov 2016 20:16:14 -0800 Subject: [PATCH 3/3] Use different hash IVs for tuplehash tables in parallel workers. That's to avoid a higher likelihood for combining tables when scanning several hashtables from parallel workers, and combining them into a bigger one. --- src/backend/executor/execGrouping.c | 17 ++++++++++++++++- src/include/nodes/execnodes.h | 1 + 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/src/backend/executor/execGrouping.c b/src/backend/executor/execGrouping.c index 94cc59d..a089ccd 100644 --- a/src/backend/executor/execGrouping.c +++ b/src/backend/executor/execGrouping.c @@ -18,6 +18,8 @@ */ #include "postgres.h" +#include "access/hash.h" +#include "access/parallel.h" #include "executor/executor.h" #include "miscadmin.h" #include "utils/lsyscache.h" @@ -314,6 +316,19 @@ BuildTupleHashTable(int numCols, AttrNumber *keyColIdx, hashtable->in_hash_funcs = NULL; hashtable->cur_eq_funcs = NULL; + /* + * If parallelism is in use, even if the master backend is performing the + * scan itself, we don't want to create the hashtable exactly the same way + * in all workers. As hashtables are iterated over in keyspace-order, + * doing so in all processes in the same way is likely to lead to + * "unbalanced" hashtables when the table size initially is + * underestimated. + */ + if (PerformingParallelWork()) + hashtable->hash_iv = hash_uint32(ParallelWorkerNumber); + else + hashtable->hash_iv = 0; + hashtable->hashtab = tuplehash_create(tablecxt, nbuckets); hashtable->hashtab->private_data = hashtable; @@ -450,7 +465,7 @@ TupleHashTableHash(struct tuplehash_hash *tb, const MinimalTuple tuple) TupleHashTable hashtable = (TupleHashTable) tb->private_data; int numCols = hashtable->numCols; AttrNumber *keyColIdx = hashtable->keyColIdx; - uint32 hashkey = 0; + uint32 hashkey = hashtable->hash_iv; TupleTableSlot *slot; FmgrInfo *hashfunctions; int i; diff --git a/src/include/nodes/execnodes.h b/src/include/nodes/execnodes.h index f6f73f3..7431352 100644 --- a/src/include/nodes/execnodes.h +++ b/src/include/nodes/execnodes.h @@ -528,6 +528,7 @@ typedef struct TupleHashTableData TupleTableSlot *inputslot; /* current input tuple's slot */ FmgrInfo *in_hash_funcs; /* hash functions for input datatype(s) */ FmgrInfo *cur_eq_funcs; /* equality functions for input vs. table */ + uint32 hash_iv; /* hash-function IV */ } TupleHashTableData; typedef tuplehash_iterator TupleHashIterator; -- 2.10.2