diff --git a/src/backend/access/heap/hio.c b/src/backend/access/heap/hio.c index 8140418..d3608c6 100644 --- a/src/backend/access/heap/hio.c +++ b/src/backend/access/heap/hio.c @@ -169,6 +169,55 @@ GetVisibilityMapPins(Relation relation, Buffer buffer1, Buffer buffer2, } /* + * Extend a relation by multiple blocks to avoid future contention on the + * relation extension lock. Our goal is to pre-extend the relation by an + * amount which ramps up as the degree of contention ramps up, but limiting + * the result to some sane overall value. + */ +static void +RelationAddExtraBlocks(Relation relation, BulkInsertState bistate) +{ + Page page; + Size freespace; + BlockNumber blockNum; + int extraBlocks = 0; + int lockWaiters = 0; + Buffer buffer; + + /* + * We use the length of the lock wait queue to judge how much to extend. + * It might seem like multiplying the number of lock waiters by as much + * as 20 is too aggressive, but benchmarking revealed that smaller numbers + * were insufficient. 512 is just an arbitrary cap to prevent pathological + * results (and excessive wasted disk space). + */ + lockWaiters = RelationExtensionLockWaiterCount(relation); + extraBlocks = Min(512, lockWaiters * 20); + + while (extraBlocks-- >= 0) + { + /* Ouch - an unnecessary lseek() each time through the loop! */ + buffer = ReadBufferBI(relation, P_NEW, bistate); + + /* Extend by one page. */ + LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); + page = BufferGetPage(buffer); + PageInit(page, BufferGetPageSize(buffer), 0); + freespace = PageGetHeapFreeSpace(page); + MarkBufferDirty(buffer); + blockNum = BufferGetBlockNumber(buffer); + UnlockReleaseBuffer(buffer); + + /* + * Put the page in the freespace map so other backends can find it. + * This is what will keep those other backends from also queueing up + * on the relation extension lock. + */ + RecordPageWithFreeSpace(relation, blockNum, freespace); + } +} + +/* * RelationGetBufferForTuple * * Returns pinned and exclusive-locked buffer of a page in given relation @@ -233,10 +282,11 @@ RelationGetBufferForTuple(Relation relation, Size len, bool use_fsm = !(options & HEAP_INSERT_SKIP_FSM); Buffer buffer = InvalidBuffer; Page page; - Size pageFreeSpace, - saveFreeSpace; + Size pageFreeSpace = 0, + saveFreeSpace = 0; BlockNumber targetBlock, - otherBlock; + otherBlock, + lastValidBlock = InvalidBlockNumber; bool needLock; len = MAXALIGN(len); /* be conservative */ @@ -308,6 +358,7 @@ RelationGetBufferForTuple(Relation relation, Size len, } } +loop: while (targetBlock != InvalidBlockNumber) { /* @@ -388,6 +439,8 @@ RelationGetBufferForTuple(Relation relation, Size len, otherBlock, targetBlock, vmbuffer_other, vmbuffer); + lastValidBlock = targetBlock; + /* * Now we can check to see if there's enough free space here. If so, * we're done. @@ -440,10 +493,57 @@ RelationGetBufferForTuple(Relation relation, Size len, */ needLock = !RELATION_IS_LOCAL(relation); + /* + * If we need the lock but are not able to acquire it immediately, we'll + * consider extending the relation by multiple blocks at a time to manage + * contention on the relation extension lock. However, this only makes + * sense if we're using the FSM; otherwise, there's no point. + */ if (needLock) - LockRelationForExtension(relation, ExclusiveLock); + { + if (!use_fsm) + LockRelationForExtension(relation, ExclusiveLock); + else if (!ConditionLockRelationForExtension(relation, ExclusiveLock)) + { + /* Couldn't get the lock immmediately; wait for it. */ + LockRelationForExtension(relation, ExclusiveLock); + + if (lastValidBlock != InvalidBlockNumber) + { + /* + * Here we are calling GetPageWithFreeSpaceUsingOldPage + * instead of GetPageWithFreeSpace, because other backend + * who have got the lock might have added extra blocks in + * the FSM and its possible that free space information + * is not yet propagated up till root node (it will be + * updated during vacuum). + * So directly start search from leaf level where we ended + * the search last time. + */ + targetBlock = GetPageWithFreeSpaceExtended(relation, + lastValidBlock, + len + saveFreeSpace); + } + + /* + * If some other waiter has already extended the relation, we + * don't need to do so; just use the existing freespace. + */ + if (targetBlock != InvalidBlockNumber) + { + UnlockRelationForExtension(relation, ExclusiveLock); + goto loop; + } + + /* Time to bulk-extend. */ + RelationAddExtraBlocks(relation, bistate); + } + } /* + * In addition to whatever extension we performed above, we always add + * at least one block to satisfy our own request. + * * XXX This does an lseek - rather expensive - but at the moment it is the * only way to accurately determine how many blocks are in a relation. Is * it worth keeping an accurate file length in shared memory someplace, diff --git a/src/backend/storage/freespace/freespace.c b/src/backend/storage/freespace/freespace.c index 2631080..980651e 100644 --- a/src/backend/storage/freespace/freespace.c +++ b/src/backend/storage/freespace/freespace.c @@ -109,6 +109,7 @@ static int fsm_set_and_search(Relation rel, FSMAddress addr, uint16 slot, uint8 newValue, uint8 minValue); static BlockNumber fsm_search(Relation rel, uint8 min_cat); static uint8 fsm_vacuum_page(Relation rel, FSMAddress addr, bool *eof); +static int fsm_search_from_addr(Relation rel, FSMAddress addr, uint8 minValue); /******** Public API ********/ @@ -129,9 +130,46 @@ static uint8 fsm_vacuum_page(Relation rel, FSMAddress addr, bool *eof); BlockNumber GetPageWithFreeSpace(Relation rel, Size spaceNeeded) { - uint8 min_cat = fsm_space_needed_to_cat(spaceNeeded); + /* + * Call GetPageWithFreeSpaceExtended with InvalidBlockNumber so that + * it will search the FSM tree from the root + */ + return GetPageWithFreeSpaceExtended(rel, InvalidBlockNumber, spaceNeeded); +} + +/* + * GetPageWithFreeSpaceExtended + * + * As above, but start the search from oldPage instead of staring from root + * So that, we can find the appropriate page in cases where free block is + * added to FSM but not yet updated up till root. If oldpage is Invalid + * then start the search from root. + */ +BlockNumber +GetPageWithFreeSpaceExtended(Relation rel, BlockNumber oldPage, + Size spaceNeeded) +{ + int search_cat = fsm_space_needed_to_cat(spaceNeeded); + FSMAddress addr; + uint16 slot; + int search_slot = -1; + + if (oldPage != InvalidBlockNumber) + { + /* Get the location of the FSM byte representing the heap block */ + addr = fsm_get_location(oldPage, &slot); + + search_slot = fsm_search_from_addr(rel, addr, search_cat); + } - return fsm_search(rel, min_cat); + /* + * If fsm_search_from_addr found a suitable new block, return that. + * Otherwise, search as usual. + */ + if (search_slot != -1) + return fsm_get_heap_blk(addr, search_slot); + else + return fsm_search(rel, search_cat); } /* @@ -634,6 +672,34 @@ fsm_set_and_search(Relation rel, FSMAddress addr, uint16 slot, } /* + * Search the the fsm tree for for free space > minValue + * It will start the search from given addr, and will be used for searching + * the required page in cases where vacuum have not yet updated the FSM tree + * till root level. + * If one is found, its slot number is returned, -1 otherwise. + */ +static int +fsm_search_from_addr(Relation rel, FSMAddress addr, uint8 minValue) +{ + Buffer buf; + int newslot = -1; + + buf = fsm_readbuf(rel, addr, true); + LockBuffer(buf, BUFFER_LOCK_SHARE); + + Assert(minValue != 0); + + /* Search while we still hold the lock */ + newslot = fsm_search_avail(buf, minValue, + addr.level == FSM_BOTTOM_LEVEL, + false); + + UnlockReleaseBuffer(buf); + + return newslot; +} + +/* * Search the tree for a heap page with at least min_cat of free space */ static BlockNumber diff --git a/src/backend/storage/lmgr/lmgr.c b/src/backend/storage/lmgr/lmgr.c index 0632fc0..7e04137 100644 --- a/src/backend/storage/lmgr/lmgr.c +++ b/src/backend/storage/lmgr/lmgr.c @@ -341,6 +341,41 @@ LockRelationForExtension(Relation relation, LOCKMODE lockmode) } /* + * ConditionLockRelationForExtension + * + * As above, but only lock if we can get the lock without blocking. + * Returns TRUE iff the lock was acquired. + */ +bool +ConditionLockRelationForExtension(Relation relation, LOCKMODE lockmode) +{ + LOCKTAG tag; + + SET_LOCKTAG_RELATION_EXTEND(tag, + relation->rd_lockInfo.lockRelId.dbId, + relation->rd_lockInfo.lockRelId.relId); + + return (LockAcquire(&tag, lockmode, false, true) != LOCKACQUIRE_NOT_AVAIL); +} + +/* + * RelationExtensionLockWaiterCount + * + * Count the lock requester for the RelationExtension lock. + */ +int +RelationExtensionLockWaiterCount(Relation relation) +{ + LOCKTAG tag; + + SET_LOCKTAG_RELATION_EXTEND(tag, + relation->rd_lockInfo.lockRelId.dbId, + relation->rd_lockInfo.lockRelId.relId); + + return LockWaiterCount(&tag); +} + +/* * UnlockRelationForExtension */ void diff --git a/src/backend/storage/lmgr/lock.c b/src/backend/storage/lmgr/lock.c index b30b7b1..353f705 100644 --- a/src/backend/storage/lmgr/lock.c +++ b/src/backend/storage/lmgr/lock.c @@ -4380,3 +4380,40 @@ VirtualXactLock(VirtualTransactionId vxid, bool wait) LockRelease(&tag, ShareLock, false); return true; } + +/* + * LockWaiterCount + * + * Find the number of lock requester on this locktag + */ +int +LockWaiterCount(const LOCKTAG *locktag) +{ + LOCKMETHODID lockmethodid = locktag->locktag_lockmethodid; + LOCK *lock; + bool found; + uint32 hashcode; + LWLock *partitionLock; + int waiters = 0; + + if (lockmethodid <= 0 || lockmethodid >= lengthof(LockMethods)) + elog(ERROR, "unrecognized lock method: %d", lockmethodid); + + hashcode = LockTagHashCode(locktag); + partitionLock = LockHashPartitionLock(hashcode); + LWLockAcquire(partitionLock, LW_EXCLUSIVE); + + lock = (LOCK *) hash_search_with_hash_value(LockMethodLockHash, + (const void *) locktag, + hashcode, + HASH_FIND, + &found); + if (found) + { + Assert(lock != NULL); + waiters = lock->nRequested; + } + LWLockRelease(partitionLock); + + return waiters; +} diff --git a/src/include/storage/freespace.h b/src/include/storage/freespace.h index 19dcb8d..27cb971 100644 --- a/src/include/storage/freespace.h +++ b/src/include/storage/freespace.h @@ -32,5 +32,9 @@ extern void XLogRecordPageWithFreeSpace(RelFileNode rnode, BlockNumber heapBlk, extern void FreeSpaceMapTruncateRel(Relation rel, BlockNumber nblocks); extern void FreeSpaceMapVacuum(Relation rel); +extern BlockNumber GetPageWithFreeSpaceExtended(Relation rel, + BlockNumber oldPage, + Size spaceNeeded); + #endif /* FREESPACE_H_ */ diff --git a/src/include/storage/lmgr.h b/src/include/storage/lmgr.h index 975b6f8..4460756 100644 --- a/src/include/storage/lmgr.h +++ b/src/include/storage/lmgr.h @@ -53,6 +53,9 @@ extern void UnlockRelationIdForSession(LockRelId *relid, LOCKMODE lockmode); /* Lock a relation for extension */ extern void LockRelationForExtension(Relation relation, LOCKMODE lockmode); extern void UnlockRelationForExtension(Relation relation, LOCKMODE lockmode); +extern bool ConditionLockRelationForExtension(Relation relation, + LOCKMODE lockmode); +extern int RelationExtensionLockWaiterCount(Relation relation); /* Lock a page (currently only used within indexes) */ extern void LockPage(Relation relation, BlockNumber blkno, LOCKMODE lockmode); diff --git a/src/include/storage/lock.h b/src/include/storage/lock.h index b26427d..9c08679 100644 --- a/src/include/storage/lock.h +++ b/src/include/storage/lock.h @@ -574,6 +574,8 @@ extern void RememberSimpleDeadLock(PGPROC *proc1, PGPROC *proc2); extern void InitDeadLockChecking(void); +extern int LockWaiterCount(const LOCKTAG *locktag); + #ifdef LOCK_DEBUG extern void DumpLocks(PGPROC *proc); extern void DumpAllLocks(void);