diff --git a/src/backend/storage/ipc/shmem.c b/src/backend/storage/ipc/shmem.c
index 78f15f0..91fcc05 100644
--- a/src/backend/storage/ipc/shmem.c
+++ b/src/backend/storage/ipc/shmem.c
@@ -265,7 +265,7 @@ InitShmemIndex(void)
*/
HTAB *
ShmemInitHash(const char *name, /* table string name for shmem index */
- long init_size, /* initial table size */
+ long init_size, /* initial table size */ // AALEKSEEV: is ignored, refactor!
long max_size, /* max size of the table */
HASHCTL *infoP, /* info about key and bucket size */
int hash_flags) /* info about infoP */
@@ -299,7 +299,7 @@ ShmemInitHash(const char *name, /* table string name for shmem index */
/* Pass location of hashtable header to hash_create */
infoP->hctl = (HASHHDR *) location;
- return hash_create(name, init_size, infoP, hash_flags);
+ return hash_create(name, max_size, infoP, hash_flags);
}
/*
diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c
index d43fb61..9d66359 100644
--- a/src/backend/storage/lmgr/lwlock.c
+++ b/src/backend/storage/lmgr/lwlock.c
@@ -359,6 +359,16 @@ NumLWLocks(void)
/* slot.c needs one for each slot */
numLocks += max_replication_slots;
+ // AALEKSEEV: refactor!
+ /* buf_table.c needs one for partitioned hash table "Shared Buffer Lookup Table" */
+ // numLocks += 1;
+
+ /* lock.c needs two for partitioned hash tables "LOCK hash" and "PROCLOCK hash" */
+ // numLocks += 2;
+
+ /* predicate.c need two for partitioned hash tables "PREDICATELOCKTARGET hash" and "PREDICATELOCK hash" */
+ // numLocks += 2;
+
/*
* Add any requested by loadable modules; for backwards-compatibility
* reasons, allocate at least NUM_USER_DEFINED_LWLOCKS of them even if
diff --git a/src/backend/utils/hash/dynahash.c b/src/backend/utils/hash/dynahash.c
index eacffc4..bc4fa4a 100644
--- a/src/backend/utils/hash/dynahash.c
+++ b/src/backend/utils/hash/dynahash.c
@@ -87,6 +87,8 @@
#include "access/xact.h"
#include "storage/shmem.h"
#include "storage/spin.h"
+#include "storage/lock.h"
+#include "storage/lwlock.h"
#include "utils/dynahash.h"
#include "utils/memutils.h"
@@ -129,11 +131,17 @@ typedef HASHBUCKET *HASHSEGMENT;
struct HASHHDR
{
/* In a partitioned table, take this lock to touch nentries or freeList */
- slock_t mutex; /* unused if not partitioned table */
+ //slock_t mutex; /* unused if not partitioned table */
+
+ // AALEKSEEV: fix comments
+ //LWLock* lock; // only for partitioned hash tables
+ slock_t mutex[NUM_LOCK_PARTITIONS];
/* These fields change during entry addition/deletion */
- long nentries; /* number of entries in hash table */
- HASHELEMENT *freeList; /* linked list of free elements */
+ /* number of entries in hash table */
+ long nentries[NUM_LOCK_PARTITIONS];
+ /* linked list of free elements */
+ HASHELEMENT *freeList[NUM_LOCK_PARTITIONS];
/* These fields can change, but not in a partitioned table */
/* Also, dsize can't change in a shared table, even if unpartitioned */
@@ -166,6 +174,9 @@ struct HASHHDR
#define IS_PARTITIONED(hctl) ((hctl)->num_partitions != 0)
+// AALEKSEEV: add comment
+#define PARTITION_IDX(hctl, hashcode) (IS_PARTITIONED(hctl) ? LockHashPartition(hashcode) : 0)
+
/*
* Top control structure for a hashtable --- in a shared table, each backend
* has its own copy (OK since no fields change at runtime)
@@ -219,10 +230,10 @@ static long hash_accesses,
*/
static void *DynaHashAlloc(Size size);
static HASHSEGMENT seg_alloc(HTAB *hashp);
-static bool element_alloc(HTAB *hashp, int nelem);
+static bool element_alloc(HTAB *hashp, int nelem, int partition_idx);
static bool dir_realloc(HTAB *hashp);
static bool expand_table(HTAB *hashp);
-static HASHBUCKET get_hash_entry(HTAB *hashp);
+static HASHBUCKET get_hash_entry(HTAB *hashp, int partition_idx);
static void hdefault(HTAB *hashp);
static int choose_nelem_alloc(Size entrysize);
static bool init_htab(HTAB *hashp, long nelem);
@@ -282,6 +293,7 @@ hash_create(const char *tabname, long nelem, HASHCTL *info, int flags)
{
HTAB *hashp;
HASHHDR *hctl;
+ int i, partitions_number, nelem_alloc;
/*
* For shared hash tables, we have a local hash header (HTAB struct) that
@@ -408,7 +420,7 @@ hash_create(const char *tabname, long nelem, HASHCTL *info, int flags)
if (!hashp->hctl)
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
- errmsg("out of memory")));
+ errmsg("out of memory (3)"))); // AALEKSEEV: fix string
}
hashp->frozen = false;
@@ -482,10 +494,20 @@ hash_create(const char *tabname, long nelem, HASHCTL *info, int flags)
if ((flags & HASH_SHARED_MEM) ||
nelem < hctl->nelem_alloc)
{
- if (!element_alloc(hashp, (int) nelem))
- ereport(ERROR,
- (errcode(ERRCODE_OUT_OF_MEMORY),
- errmsg("out of memory")));
+ if(IS_PARTITIONED(hashp->hctl))
+ partitions_number = NUM_LOCK_PARTITIONS;
+ else
+ partitions_number = 1;
+
+ nelem_alloc = ((int) nelem) / partitions_number;
+ if(nelem_alloc == 0)
+ nelem_alloc = 1;
+
+ for(i = 0; i < partitions_number; i++)
+ if (!element_alloc(hashp, nelem_alloc, i))
+ ereport(ERROR,
+ (errcode(ERRCODE_OUT_OF_MEMORY),
+ errmsg("out of memory (1)"))); // AALEKSEEV: fix string
}
if (flags & HASH_FIXED_SIZE)
@@ -503,8 +525,11 @@ hdefault(HTAB *hashp)
MemSet(hctl, 0, sizeof(HASHHDR));
- hctl->nentries = 0;
- hctl->freeList = NULL;
+ // AALEKSEEV: redundant!
+ // hctl->nentries = 0;
+ // hctl->freeList = NULL;
+
+ // hctl->lock = NULL;
hctl->dsize = DEF_DIRSIZE;
hctl->nsegs = 0;
@@ -572,12 +597,19 @@ init_htab(HTAB *hashp, long nelem)
HASHSEGMENT *segp;
int nbuckets;
int nsegs;
-
+ int i;
/*
* initialize mutex if it's a partitioned table
*/
- if (IS_PARTITIONED(hctl))
- SpinLockInit(&hctl->mutex);
+ if (IS_PARTITIONED(hctl))
+ {
+ for(i = 0; i < NUM_LOCK_PARTITIONS; i++)
+ SpinLockInit(&(hctl->mutex[i]));
+ }
+
+// AALEKSEEV: remove
+ //if(IS_PARTITIONED(hctl))
+ //hctl->lock = LWLockAssign();
/*
* Divide number of elements by the fill factor to determine a desired
@@ -648,7 +680,8 @@ init_htab(HTAB *hashp, long nelem)
"HIGH MASK ", hctl->high_mask,
"LOW MASK ", hctl->low_mask,
"NSEGS ", hctl->nsegs,
- "NENTRIES ", hctl->nentries);
+ // AALEKSEEV: fix this
+ "NENTRIES ", hctl->nentries[0]);
#endif
return true;
}
@@ -769,7 +802,8 @@ hash_stats(const char *where, HTAB *hashp)
where, hashp->hctl->accesses, hashp->hctl->collisions);
fprintf(stderr, "hash_stats: entries %ld keysize %ld maxp %u segmentcount %ld\n",
- hashp->hctl->nentries, (long) hashp->hctl->keysize,
+ // AALEKSEEV: fix this
+ hashp->hctl->nentries[0], (long) hashp->hctl->keysize,
hashp->hctl->max_bucket, hashp->hctl->nsegs);
fprintf(stderr, "%s: total accesses %ld total collisions %ld\n",
where, hash_accesses, hash_collisions);
@@ -863,6 +897,7 @@ hash_search_with_hash_value(HTAB *hashp,
HASHBUCKET currBucket;
HASHBUCKET *prevBucketPtr;
HashCompareFunc match;
+ int partition_idx = PARTITION_IDX(hctl, hashvalue);
#if HASH_STATISTICS
hash_accesses++;
@@ -885,7 +920,7 @@ hash_search_with_hash_value(HTAB *hashp,
* order of these tests is to try to check cheaper conditions first.
*/
if (!IS_PARTITIONED(hctl) && !hashp->frozen &&
- hctl->nentries / (long) (hctl->max_bucket + 1) >= hctl->ffactor &&
+ hctl->nentries[0] / (long) (hctl->max_bucket + 1) >= hctl->ffactor &&
!has_seq_scans(hashp))
(void) expand_table(hashp);
}
@@ -942,21 +977,25 @@ hash_search_with_hash_value(HTAB *hashp,
if (currBucket != NULL)
{
/* if partitioned, must lock to touch nentries and freeList */
+ // AALEKSEEV: remove this
if (IS_PARTITIONED(hctl))
- SpinLockAcquire(&hctl->mutex);
+ //LWLockAcquire(hctl->lock, LW_SHARED);
+ SpinLockAcquire(&(hctl->mutex[partition_idx]));
- Assert(hctl->nentries > 0);
- hctl->nentries--;
+ Assert(hctl->nentries[partition_idx] > 0);
+ hctl->nentries[partition_idx]--;
/* remove record from hash bucket's chain. */
*prevBucketPtr = currBucket->link;
/* add the record to the freelist for this table. */
- currBucket->link = hctl->freeList;
- hctl->freeList = currBucket;
+ currBucket->link = hctl->freeList[partition_idx];
+ hctl->freeList[partition_idx] = currBucket;
+ // AALEKSEEV: remove this
if (IS_PARTITIONED(hctl))
- SpinLockRelease(&hctl->mutex);
+ // LWLockRelease(hctl->lock);
+ SpinLockRelease(&hctl->mutex[partition_idx]);
/*
* better hope the caller is synchronizing access to this
@@ -982,7 +1021,7 @@ hash_search_with_hash_value(HTAB *hashp,
elog(ERROR, "cannot insert into frozen hashtable \"%s\"",
hashp->tabname);
- currBucket = get_hash_entry(hashp);
+ currBucket = get_hash_entry(hashp, partition_idx);
if (currBucket == NULL)
{
/* out of memory */
@@ -996,7 +1035,7 @@ hash_search_with_hash_value(HTAB *hashp,
else
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
- errmsg("out of memory")));
+ errmsg("out of memory (2)"))); // AALEKSEEV: fix string
}
/* link into hashbucket chain */
@@ -1175,41 +1214,81 @@ hash_update_hash_key(HTAB *hashp,
* create a new entry if possible
*/
static HASHBUCKET
-get_hash_entry(HTAB *hashp)
+get_hash_entry(HTAB *hashp, int partition_idx)
{
HASHHDR *hctl = hashp->hctl;
HASHBUCKET newElement;
+ int i, steal_from_idx;
for (;;)
{
/* if partitioned, must lock to touch nentries and freeList */
if (IS_PARTITIONED(hctl))
- SpinLockAcquire(&hctl->mutex);
+ // LWLockAcquire(hctl->lock, LW_SHARED);
+ SpinLockAcquire(&hctl->mutex[partition_idx]);
/* try to get an entry from the freelist */
- newElement = hctl->freeList;
+ newElement = hctl->freeList[partition_idx];
+
if (newElement != NULL)
- break;
+ {
+ /* remove entry from freelist, bump nentries */
+ hctl->freeList[partition_idx] = newElement->link;
+ hctl->nentries[partition_idx]++;
+ if (IS_PARTITIONED(hctl))
+ // LWLockRelease(hctl->lock);
+ SpinLockRelease(&hctl->mutex[partition_idx]);
+
+ return newElement;
+ }
- /* no free elements. allocate another chunk of buckets */
if (IS_PARTITIONED(hctl))
- SpinLockRelease(&hctl->mutex);
+ SpinLockRelease(&hctl->mutex[partition_idx]);
+ // LWLockRelease(hctl->lock);
- if (!element_alloc(hashp, hctl->nelem_alloc))
+ /* no free elements. allocate another chunk of buckets */
+ if (!element_alloc(hashp, hctl->nelem_alloc, partition_idx))
{
- /* out of memory */
- return NULL;
- }
- }
+ if (!IS_PARTITIONED(hctl))
+ return NULL; /* out of memory */
- /* remove entry from freelist, bump nentries */
- hctl->freeList = newElement->link;
- hctl->nentries++;
+ /* try to "steal" element from another partition */
+ // LWLockAcquire(hctl->lock, LW_EXCLUSIVE);
- if (IS_PARTITIONED(hctl))
- SpinLockRelease(&hctl->mutex);
+ // for(i = 0; i < NUM_LOCK_PARTITIONS; i++)
+ // SpinLockAcquire(&(hctl->mutex[i]));
+
+ steal_from_idx = partition_idx;
+ for(;;)
+ {
+ steal_from_idx = (steal_from_idx + 1) % NUM_LOCK_PARTITIONS;
+ if(steal_from_idx == partition_idx)
+ break;
+
+ SpinLockAcquire(&(hctl->mutex[steal_from_idx]));
+ newElement = hctl->freeList[steal_from_idx];
+
+ if(newElement != NULL)
+ {
+ hctl->freeList[steal_from_idx] = newElement->link;
+ SpinLockRelease(&(hctl->mutex[steal_from_idx]));
+
+ SpinLockAcquire(&hctl->mutex[partition_idx]);
+ hctl->nentries[partition_idx]++;
+ SpinLockRelease(&hctl->mutex[partition_idx]);
+
+ break;
+ }
- return newElement;
+ SpinLockRelease(&(hctl->mutex[steal_from_idx]));
+ }
+
+ // LWLockRelease(hctl->lock);
+ // for(i = 0; i < NUM_LOCK_PARTITIONS; i++)
+ // SpinLockRelease(&(hctl->mutex[i]));
+ return newElement;
+ }
+ }
}
/*
@@ -1218,11 +1297,21 @@ get_hash_entry(HTAB *hashp)
long
hash_get_num_entries(HTAB *hashp)
{
+ int i;
+ long sum = hashp->hctl->nentries[0];
+
/*
* We currently don't bother with the mutex; it's only sensible to call
* this function if you've got lock on all partitions of the table.
*/
- return hashp->hctl->nentries;
+
+ if(!IS_PARTITIONED(hashp->hctl))
+ return sum;
+
+ for(i = 1; i < NUM_LOCK_PARTITIONS; i++)
+ sum += hashp->hctl->nentries[i];
+
+ return sum;
}
/*
@@ -1530,7 +1619,7 @@ seg_alloc(HTAB *hashp)
* allocate some new elements and link them into the free list
*/
static bool
-element_alloc(HTAB *hashp, int nelem)
+element_alloc(HTAB *hashp, int nelem, int partition_idx)
{
HASHHDR *hctl = hashp->hctl;
Size elementSize;
@@ -1562,15 +1651,23 @@ element_alloc(HTAB *hashp, int nelem)
}
/* if partitioned, must lock to touch freeList */
+ // if (IS_PARTITIONED(hctl))
+ // SpinLockAcquire(&hctl->mutex);
+
if (IS_PARTITIONED(hctl))
- SpinLockAcquire(&hctl->mutex);
+ // LWLockAcquire(hctl->lock, LW_SHARED);
+ SpinLockAcquire(&hctl->mutex[partition_idx]);
/* freelist could be nonempty if two backends did this concurrently */
- firstElement->link = hctl->freeList;
- hctl->freeList = prevElement;
+ firstElement->link = hctl->freeList[partition_idx];
+ hctl->freeList[partition_idx] = prevElement;
if (IS_PARTITIONED(hctl))
- SpinLockRelease(&hctl->mutex);
+ // LWLockRelease(hctl->lock);
+ SpinLockRelease(&hctl->mutex[partition_idx]);
+
+ // if (IS_PARTITIONED(hctl))
+ // SpinLockRelease(&hctl->mutex);
return true;
}
diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h
index ff34529..767f20b 100644
--- a/src/include/storage/lwlock.h
+++ b/src/include/storage/lwlock.h
@@ -128,13 +128,14 @@ extern char *MainLWLockNames[];
* having this file include lock.h or bufmgr.h would be backwards.
*/
-/* Number of partitions of the shared buffer mapping hashtable */
-#define NUM_BUFFER_PARTITIONS 128
-
/* Number of partitions the shared lock tables are divided into */
#define LOG2_NUM_LOCK_PARTITIONS 4
#define NUM_LOCK_PARTITIONS (1 << LOG2_NUM_LOCK_PARTITIONS)
+ /* Number of partitions of the shared buffer mapping hashtable */
+ // AALEKSEEV: refactor
+#define NUM_BUFFER_PARTITIONS NUM_LOCK_PARTITIONS
+
/* Number of partitions the shared predicate lock tables are divided into */
#define LOG2_NUM_PREDICATELOCK_PARTITIONS 4
#define NUM_PREDICATELOCK_PARTITIONS (1 << LOG2_NUM_PREDICATELOCK_PARTITIONS)