diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index 781a736..ee3fffb 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -1900,18 +1900,20 @@ show_hash_info(HashState *hashstate, ExplainState *es)
 		if (es->format != EXPLAIN_FORMAT_TEXT)
 		{
 			ExplainPropertyLong("Hash Buckets", hashtable->nbuckets, es);
+			ExplainPropertyLong("Original Hash Buckets",
+								hashtable->nbuckets_original, es);
 			ExplainPropertyLong("Hash Batches", hashtable->nbatch, es);
 			ExplainPropertyLong("Original Hash Batches",
 								hashtable->nbatch_original, es);
 			ExplainPropertyLong("Peak Memory Usage", spacePeakKb, es);
 		}
-		else if (hashtable->nbatch_original != hashtable->nbatch)
+		else if ((hashtable->nbatch_original != hashtable->nbatch) || (hashtable->nbuckets_original != hashtable->nbuckets))
 		{
 			appendStringInfoSpaces(es->str, es->indent * 2);
 			appendStringInfo(es->str,
-			"Buckets: %d  Batches: %d (originally %d)  Memory Usage: %ldkB\n",
-							 hashtable->nbuckets, hashtable->nbatch,
-							 hashtable->nbatch_original, spacePeakKb);
+			"Buckets: %d (originally %d)  Batches: %d (originally %d)  Memory Usage: %ldkB\n",
+							 hashtable->nbuckets, hashtable->nbuckets_original,
+							 hashtable->nbatch, hashtable->nbatch_original, spacePeakKb);
 		}
 		else
 		{
diff --git a/src/backend/executor/nodeHash.c b/src/backend/executor/nodeHash.c
index 6a9d956..014b6f1 100644
--- a/src/backend/executor/nodeHash.c
+++ b/src/backend/executor/nodeHash.c
@@ -37,8 +37,10 @@
 #include "utils/lsyscache.h"
 #include "utils/syscache.h"

+bool enable_hash_resize = true;

 static void ExecHashIncreaseNumBatches(HashJoinTable hashtable);
+static void ExecHashIncreaseNumBuckets(HashJoinTable hashtable);
 static void ExecHashBuildSkewHash(HashJoinTable hashtable, Hash *node,
 					  int mcvsToUse);
 static void ExecHashSkewTableInsert(HashJoinTable hashtable,
@@ -49,6 +51,9 @@ static void ExecHashRemoveNextSkewBucket(HashJoinTable hashtable);

 static char * chunk_alloc(HashJoinTable hashtable, int tupleSize);

+/* Memory needed for optimal number of buckets. */
+#define BUCKETS_SPACE(htab)	((htab)->nbuckets_optimal * sizeof(void*))
+
 /* ----------------------------------------------------------------
  *		ExecHash
  *
@@ -117,6 +122,7 @@ MultiExecHash(HashState *node)
 				/* It's a skew tuple, so put it into that hash table */
 				ExecHashSkewTableInsert(hashtable, slot, hashvalue,
 										bucketNumber);
+				hashtable->skewTuples += 1;
 			}
 			else
 			{
@@ -127,6 +133,30 @@ MultiExecHash(HashState *node)
 		}
 	}

+	/* If average number of tuples per bucket is over the defined threshold,
+	 * increase the number of buckets to get below it. */
+	if (enable_hash_resize) {
+
+		/* 
+		 * Do we actually need to resize? Only scale up, don't scale down (for example
+		 * after adding a batch, it's possible that nbuckets > nbuckets_optimal).
+		 */
+		if (hashtable->nbuckets < hashtable->nbuckets_optimal) {
+
+#ifdef HJDEBUG
+			printf("Increasing nbuckets %d => %d\n",
+				   hashtable->nbuckets, hashtable->nbuckets_optimal);
+#endif
+			ExecHashIncreaseNumBuckets(hashtable);
+		}
+
+	}
+
+	/* Account for the buckets in spaceUsed (reported in EXPLAIN ANALYZE) */
+	hashtable->spaceUsed += BUCKETS_SPACE(hashtable);
+	if (hashtable->spaceUsed > hashtable->spacePeak)
+			hashtable->spacePeak = hashtable->spaceUsed;
+
 	/* must provide our own instrumentation support */
 	if (node->ps.instrument)
 		InstrStopNode(node->ps.instrument, hashtable->totalTuples);
@@ -239,6 +269,7 @@ ExecHashTableCreate(Hash *node, List *hashOperators, bool keepNulls)
 	int			nbatch;
 	int			num_skew_mcvs;
 	int			log2_nbuckets;
+	int			log2_nbatch;
 	int			nkeys;
 	int			i;
 	ListCell   *ho;
@@ -263,6 +294,10 @@ ExecHashTableCreate(Hash *node, List *hashOperators, bool keepNulls)
 	log2_nbuckets = my_log2(nbuckets);
 	Assert(nbuckets == (1 << log2_nbuckets));

+	/* nbatch must be a power of 2 */
+	log2_nbatch = my_log2(nbatch);
+	Assert(nbatch == (1 << log2_nbatch));
+
 	/*
 	 * Initialize the hash table control block.
 	 *
@@ -271,6 +306,8 @@ ExecHashTableCreate(Hash *node, List *hashOperators, bool keepNulls)
 	 */
 	hashtable = (HashJoinTable) palloc(sizeof(HashJoinTableData));
 	hashtable->nbuckets = nbuckets;
+	hashtable->nbuckets_original = nbuckets;
+	hashtable->nbuckets_optimal = nbuckets;
 	hashtable->log2_nbuckets = log2_nbuckets;
 	hashtable->buckets = NULL;
 	hashtable->keepNulls = keepNulls;
@@ -281,10 +318,12 @@ ExecHashTableCreate(Hash *node, List *hashOperators, bool keepNulls)
 	hashtable->skewBucketNums = NULL;
 	hashtable->nbatch = nbatch;
 	hashtable->curbatch = 0;
+	hashtable->log2_nbatch = log2_nbatch;
 	hashtable->nbatch_original = nbatch;
 	hashtable->nbatch_outstart = nbatch;
 	hashtable->growEnabled = true;
 	hashtable->totalTuples = 0;
+	hashtable->skewTuples = 0;
 	hashtable->innerBatchFile = NULL;
 	hashtable->outerBatchFile = NULL;
 	hashtable->spaceUsed = 0;
@@ -377,7 +416,6 @@ ExecHashTableCreate(Hash *node, List *hashOperators, bool keepNulls)
 	return hashtable;
 }

-
 /*
  * Compute appropriate size for hashtable given the estimated size of the
  * relation to be hashed (number of rows and average row width).
@@ -386,7 +424,7 @@ ExecHashTableCreate(Hash *node, List *hashOperators, bool keepNulls)
  */

 /* Target bucket loading (tuples per bucket) */
-#define NTUP_PER_BUCKET			10
+#define NTUP_PER_BUCKET			1

 void
 ExecChooseHashTableSize(double ntuples, int tupwidth, bool useskew,
@@ -396,6 +434,7 @@ ExecChooseHashTableSize(double ntuples, int tupwidth, bool useskew,
 {
 	int			tupsize;
 	double		inner_rel_bytes;
+	double		buckets_bytes;
 	long		hash_table_bytes;
 	long		skew_table_bytes;
 	long		max_pointers;
@@ -417,6 +456,9 @@ ExecChooseHashTableSize(double ntuples, int tupwidth, bool useskew,
 		MAXALIGN(tupwidth);
 	inner_rel_bytes = ntuples * tupsize;

+	/* Estimate memory needed for buckets, with the target NTUP_PER_BUCKET */
+	buckets_bytes = sizeof(void*) * my_log2(ntuples / NTUP_PER_BUCKET);
+
 	/*
 	 * Target in-memory hashtable size is work_mem kilobytes.
 	 */
@@ -468,23 +510,48 @@ ExecChooseHashTableSize(double ntuples, int tupwidth, bool useskew,
 	/* also ensure we avoid integer overflow in nbatch and nbuckets */
 	max_pointers = Min(max_pointers, INT_MAX / 2);

-	if (inner_rel_bytes > hash_table_bytes)
+	if ((inner_rel_bytes + buckets_bytes) > hash_table_bytes)
 	{
 		/* We'll need multiple batches */
 		long		lbuckets;
 		double		dbatch;
 		int			minbatch;
-
-		lbuckets = (hash_table_bytes / tupsize) / NTUP_PER_BUCKET;
-		lbuckets = Min(lbuckets, max_pointers);
-		nbuckets = (int) lbuckets;
+		double		batch_bytes;

 		dbatch = ceil(inner_rel_bytes / hash_table_bytes);
 		dbatch = Min(dbatch, max_pointers);
 		minbatch = (int) dbatch;
 		nbatch = 2;
+
+		/* increase nbatch until the hashtable fits into work_mem */
 		while (nbatch < minbatch)
 			nbatch <<= 1;
+
+		/* now, we need to see if the buckets fit into work_mem too */
+		lbuckets = 1 << (my_log2(ntuples / nbatch / NTUP_PER_BUCKET));
+
+		/* assuming equally-sized batches, compute bytes of hash table and buckets */
+		batch_bytes = inner_rel_bytes / nbatch;
+		buckets_bytes = lbuckets * sizeof(void*);
+
+		/* we need to get both into work_mem */
+		while (batch_bytes + buckets_bytes > hash_table_bytes) {
+
+			/* increment number of batches, and re-evaluate the amount of memory */
+			nbatch <<= 1;
+
+			/* now, we need to see if the buckets fit into work_mem too */
+			lbuckets = 1 << (my_log2(ntuples / nbatch / NTUP_PER_BUCKET));
+
+			/* assuming equally-sized batches, compute bytes of hash table and buckets */
+			batch_bytes = inner_rel_bytes / nbatch;
+			buckets_bytes = lbuckets * sizeof(void*);
+
+		}
+
+		/* protect against nbucket overflow */
+		lbuckets = Min(lbuckets, max_pointers);
+		nbuckets = (int) lbuckets;
 	}
 	else
 	{
@@ -561,6 +628,8 @@ ExecHashIncreaseNumBatches(HashJoinTable hashtable)
 	MemoryContext oldcxt;
 	long		ninmemory;
 	long		nfreed;
+	double		batchTuples;
+
 	HashChunk	oldchunks = hashtable->chunks_batch;

 	/* do nothing if we've decided to shut off growth */
@@ -607,6 +676,7 @@ ExecHashIncreaseNumBatches(HashJoinTable hashtable)
 	MemoryContextSwitchTo(oldcxt);

 	hashtable->nbatch = nbatch;
+	hashtable->log2_nbatch += 1;

 	/*
 	 * Scan through the existing hash table entries and dump out any that are
@@ -700,9 +770,123 @@ ExecHashIncreaseNumBatches(HashJoinTable hashtable)
 		printf("Disabling further increase of nbatch\n");
 #endif
 	}
+
+	/*
+	 * After increasing the number of batches, the number of tuples per batch
+	 * is lower (~1/2), so the optimal number of buckets might be lower too.
+	 * It's probably (nbuckets_optimal/2) but let's compute it anyway
+	 * (let's start with nbuckets_original - we already have that memory
+	 * allocated, so let's use it).
+	 */
+	hashtable->nbuckets_optimal = hashtable->nbuckets_optimal;
+	batchTuples = (hashtable->totalTuples - hashtable->skewTuples) / hashtable->nbatch;
+	while ((batchTuples / hashtable->nbuckets_optimal > NTUP_PER_BUCKET)
+		&& (hashtable->nbuckets_optimal <= (INT_MAX/2))) {
+		hashtable->nbuckets_optimal *= 2;
+	}
 }

 /*
+ * ExecHashIncreaseNumBuckets
+ *		increase the original number of buckets in order to reduce
+ *		number of tuples per bucket
+ */
+static void
+ExecHashIncreaseNumBuckets(HashJoinTable hashtable)
+{
+	HashChunk	chunk;
+
+	/* do nothing if not an increase (it's called increase for a reason) */
+	if (hashtable->nbuckets >= hashtable->nbuckets_optimal)
+		return;
+
+	/*
+	 * We already know the optimal number of buckets, so let's just
+	 * compute the log2_nbuckets for it.
+	 */
+	hashtable->nbuckets = hashtable->nbuckets_optimal;
+	hashtable->log2_nbuckets = my_log2(hashtable->nbuckets_optimal);
+
+	/* XXX Not sure if we should update the info about used space here.
+	 * The code seems to ignore the space used for 'buckets' and we're not
+	 * allocating more space for tuples (just shuffling them to the new
+	 * buckets). And the amount of memory used for buckets is quite small
+	 * (just an array of pointers, thus ~8kB per 1k buckets on 64-bit). */
+
+	/* XXX Should we disable growth if (nbuckets * NTUP_PER_BUCKET)
+	 * reaches work_mem (or something like that)? We shouldn't really
+	 * get into such position (should be handled by increasing the
+	 * number of batches, which is called right before this). */
+
+	/* XXX Maybe adding info into hashjoin explain output (e.g. initial
+	 * nbuckets, time spent growing the table) would be appropriate. */
+
+	Assert(hashtable->nbuckets > 1);
+	Assert(hashtable->nbuckets <= (INT_MAX/2));
+	Assert(hashtable->nbuckets == (1 << hashtable->log2_nbuckets));
+
+#ifdef HJDEBUG
+	printf("Increasing nbuckets to %d\n", hashtable->nbuckets);
+#endif
+
+	/* Just reallocate the proper number of buckets - we don't need to
+	 * walk through them - we can walk the dense-allocated chunks
+	 * (just like in ExecHashIncreaseNumBatches, but without all the
+	 * copying into new chunks)
+	 */
+
+	/* just resize the existing bucket list to the right size */
+	hashtable->buckets = (HashJoinTuple *) repalloc(hashtable->buckets,
+								hashtable->nbuckets * sizeof(HashJoinTuple));
+
+	/* XXX Maybe not needed? */
+	memset(hashtable->buckets, 0, sizeof(void*) * hashtable->nbuckets);
+
+	/* so, let's scan through the chunks, and all tuples in each chunk */
+	chunk = hashtable->chunks_batch;
+	while (chunk != NULL) {
+
+		/* position within the buffer (up to chunk->used) */
+		size_t idx = 0;	
+
+		/* process all tuples stored in this chunk (and then free it) */
+		while (idx < chunk->used) {
+
+			/* get the hashjoin tuple and mintuple stored right after it */
+			HashJoinTuple hashTuple = (HashJoinTuple)(chunk->data + idx);
+			MinimalTuple tuple = HJTUPLE_MINTUPLE(hashTuple);
+
+			int		hashTupleSize = (HJTUPLE_OVERHEAD + tuple->t_len);
+
+			int		bucketno;
+			int		batchno;
+
+			ExecHashGetBucketAndBatch(hashtable, hashTuple->hashvalue,
+									  &bucketno, &batchno);
+
+			/* just add the tuple to the proper bucket */
+			hashTuple->next = hashtable->buckets[bucketno];
+			hashtable->buckets[bucketno] = hashTuple;
+
+			/* bytes occupied in memory HJ tuple overhead + actual tuple length */
+			idx += hashTupleSize;
+
+		}
+
+		/* proceed to the next chunk */
+		chunk = chunk->next;
+
+	}
+
+#ifdef HJDEBUG
+	printf("Nbuckets increased to %d, average items per bucket %.1f\n",
+		   hashtable->nbuckets, batchTuples / hashtable->nbuckets);
+#endif
+	
+}
+
+
+/*
  * ExecHashTableInsert
  *		insert a tuple into the hash table depending on the hash value
  *		it may just go to a temp file for later batches
@@ -755,12 +939,26 @@ ExecHashTableInsert(HashJoinTable hashtable,
 		hashTuple->next = hashtable->buckets[bucketno];
 		hashtable->buckets[bucketno] = hashTuple;

+		/* Increase the number of buckets (optimal) if we exceeded NTUP_PER_BUCKET. */
+		if (((hashtable->totalTuples - hashtable->skewTuples) / hashtable->nbatch)
+			>= (hashtable->nbuckets_optimal * NTUP_PER_BUCKET)) {
+
+			/* overflow protection */
+			if (hashtable->nbuckets_optimal <= (INT_MAX/2))
+				hashtable->nbuckets_optimal *= 2;
+
+		}
+
 		/* Account for space used, and back off if we've used too much */
 		hashtable->spaceUsed += hashTupleSize;
 		if (hashtable->spaceUsed > hashtable->spacePeak)
 			hashtable->spacePeak = hashtable->spaceUsed;
-		if (hashtable->spaceUsed > hashtable->spaceAllowed)
+
+		/* Do we need to increase number of batches? Account for space required
+		 * by buckets (optimal number). */
+		if (hashtable->spaceUsed + BUCKETS_SPACE(hashtable) > hashtable->spaceAllowed)
 			ExecHashIncreaseNumBatches(hashtable);
+
 	}
 	else
 	{
@@ -877,14 +1075,12 @@ ExecHashGetHashValue(HashJoinTable hashtable,
  * chains), and must only cause the batch number to remain the same or
  * increase.  Our algorithm is
  *		bucketno = hashvalue MOD nbuckets
- *		batchno = (hashvalue DIV nbuckets) MOD nbatch
+ *		batchno =  highers log2(nbatch) bits
  * where nbuckets and nbatch are both expected to be powers of 2, so we can
  * do the computations by shifting and masking.  (This assumes that all hash
  * functions are good about randomizing all their output bits, else we are
  * likely to have very skewed bucket or batch occupancy.)
  *
- * nbuckets doesn't change over the course of the join.
- *
  * nbatch is always a power of 2; we increase it only by doubling it.  This
  * effectively adds one more bit to the top of the batchno.
  */
@@ -901,7 +1097,7 @@ ExecHashGetBucketAndBatch(HashJoinTable hashtable,
 	{
 		/* we can do MOD by masking, DIV by shifting */
 		*bucketno = hashvalue & (nbuckets - 1);
-		*batchno = (hashvalue >> hashtable->log2_nbuckets) & (nbatch - 1);
+		*batchno = (hashvalue >> (32 - hashtable->log2_nbatch));
 	}
 	else
 	{
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 6c52db8..9dae612 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -788,6 +788,15 @@ static struct config_bool ConfigureNamesBool[] =
 		NULL, NULL, NULL
 	},
 	{
+		{"enable_hash_resize", PGC_USERSET, QUERY_TUNING_METHOD,
+			gettext_noop("Enables dynamic increase of hash buckets."),
+			NULL
+		},
+		&enable_hash_resize,
+		true,
+		NULL, NULL, NULL
+	},
+	{
 		{"geqo", PGC_USERSET, QUERY_TUNING_GEQO,
 			gettext_noop("Enables genetic query optimization."),
 			gettext_noop("This algorithm attempts to do planning without "
diff --git a/src/include/executor/hashjoin.h b/src/include/executor/hashjoin.h
index c80a4d1..9d03de2 100644
--- a/src/include/executor/hashjoin.h
+++ b/src/include/executor/hashjoin.h
@@ -122,8 +122,11 @@ typedef struct HashChunkData* HashChunk;
 typedef struct HashJoinTableData
 {
 	int			nbuckets;		/* # buckets in the in-memory hash table */
+	int			nbuckets_original;	/* # buckets when starting the first hash */
 	int			log2_nbuckets;	/* its log2 (nbuckets must be a power of 2) */

+	int			nbuckets_optimal;	/* optimal number of buckets (for batch) */
+
 	/* buckets[i] is head of list of tuples in i'th in-memory bucket */
 	struct HashJoinTupleData **buckets;
 	/* buckets array is per-batch storage, as are all the tuples */
@@ -138,6 +141,7 @@ typedef struct HashJoinTableData

 	int			nbatch;			/* number of batches */
 	int			curbatch;		/* current batch #; 0 during 1st pass */
+	int			log2_nbatch;	/* its log2 (nbuckets must be a power of 2) */

 	int			nbatch_original;	/* nbatch when we started inner scan */
 	int			nbatch_outstart;	/* nbatch when we started outer scan */
@@ -145,6 +149,7 @@ typedef struct HashJoinTableData
 	bool		growEnabled;	/* flag to shut off nbatch increases */

 	double		totalTuples;	/* # tuples obtained from inner plan */
+	double		skewTuples;		/* # tuples inserted into skew tuples */

 	/*
 	 * These arrays are allocated for the life of the hash join, but only if
diff --git a/src/include/optimizer/cost.h b/src/include/optimizer/cost.h
index 75e2afb..e7c5f50 100644
--- a/src/include/optimizer/cost.h
+++ b/src/include/optimizer/cost.h
@@ -62,6 +62,7 @@ extern bool enable_material;
 extern bool enable_mergejoin;
 extern bool enable_hashjoin;
 extern int	constraint_exclusion;
+extern bool enable_hash_resize;

 extern double clamp_row_est(double nrows);
 extern double index_pages_fetched(double tuples_fetched, BlockNumber pages,