diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 4576970..eab30a6 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -78,6 +78,7 @@ bool zero_damaged_pages = false; int bgwriter_lru_maxpages = 100; double bgwriter_lru_multiplier = 2.0; bool track_io_timing = false; +bool sync_on_checkpoint_flush = false; /* * How many buffers PrefetchBuffer callers should try to stay ahead of their @@ -356,7 +357,7 @@ static bool PinBuffer(volatile BufferDesc *buf, BufferAccessStrategy strategy); static void PinBuffer_Locked(volatile BufferDesc *buf); static void UnpinBuffer(volatile BufferDesc *buf, bool fixOwner); static void BufferSync(int flags); -static int SyncOneBuffer(int buf_id, bool skip_recently_used); +static int SyncOneBuffer(int buf_id, bool skip_recently_used, bool flush_to_disk); static void WaitIO(volatile BufferDesc *buf); static bool StartBufferIO(volatile BufferDesc *buf, bool forInput); static void TerminateBufferIO(volatile BufferDesc *buf, bool clear_dirty, @@ -369,7 +370,7 @@ static volatile BufferDesc *BufferAlloc(SMgrRelation smgr, BlockNumber blockNum, BufferAccessStrategy strategy, bool *foundPtr); -static void FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln); +static void FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln, bool flush_to_disk); static void AtProcExit_Buffers(int code, Datum arg); static void CheckForBufferLeaks(void); static int rnode_comparator(const void *p1, const void *p2); @@ -947,7 +948,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, smgr->smgr_rnode.node.dbNode, smgr->smgr_rnode.node.relNode); - FlushBuffer(buf, NULL); + FlushBuffer(buf, NULL, false); LWLockRelease(buf->content_lock); TRACE_POSTGRESQL_BUFFER_WRITE_DIRTY_DONE(forkNum, blockNum, @@ -1573,7 +1574,7 @@ BufferSync(int flags) */ if (bufHdr->flags & BM_CHECKPOINT_NEEDED) { - if (SyncOneBuffer(buf_id, false) & BUF_WRITTEN) + if (SyncOneBuffer(buf_id, false, true) & BUF_WRITTEN) { TRACE_POSTGRESQL_BUFFER_SYNC_WRITTEN(buf_id); BgWriterStats.m_buf_written_checkpoints++; @@ -1850,7 +1851,7 @@ BgBufferSync(void) /* Execute the LRU scan */ while (num_to_scan > 0 && reusable_buffers < upcoming_alloc_est) { - int buffer_state = SyncOneBuffer(next_to_clean, true); + int buffer_state = SyncOneBuffer(next_to_clean, true, false); if (++next_to_clean >= NBuffers) { @@ -1927,7 +1928,7 @@ BgBufferSync(void) * Note: caller must have done ResourceOwnerEnlargeBuffers. */ static int -SyncOneBuffer(int buf_id, bool skip_recently_used) +SyncOneBuffer(int buf_id, bool skip_recently_used, bool flush_to_disk) { volatile BufferDesc *bufHdr = &BufferDescriptors[buf_id]; int result = 0; @@ -1966,7 +1967,7 @@ SyncOneBuffer(int buf_id, bool skip_recently_used) PinBuffer_Locked(bufHdr); LWLockAcquire(bufHdr->content_lock, LW_SHARED); - FlushBuffer(bufHdr, NULL); + FlushBuffer(bufHdr, NULL, flush_to_disk); LWLockRelease(bufHdr->content_lock); UnpinBuffer(bufHdr, true); @@ -2230,7 +2231,7 @@ BufferGetTag(Buffer buffer, RelFileNode *rnode, ForkNumber *forknum, * as the second parameter. If not, pass NULL. */ static void -FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln) +FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln, bool flush_to_disk) { XLogRecPtr recptr; ErrorContextCallback errcallback; @@ -2342,6 +2343,10 @@ FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln) reln->smgr_rnode.node.spcNode, reln->smgr_rnode.node.dbNode, reln->smgr_rnode.node.relNode); + if (flush_to_disk && sync_on_checkpoint_flush) + smgrflush(reln, + buf->tag.forkNum, + buf->tag.blockNum); /* Pop the error context stack */ error_context_stack = errcallback.previous; @@ -2769,7 +2774,7 @@ FlushRelationBuffers(Relation rel) { PinBuffer_Locked(bufHdr); LWLockAcquire(bufHdr->content_lock, LW_SHARED); - FlushBuffer(bufHdr, rel->rd_smgr); + FlushBuffer(bufHdr, rel->rd_smgr, false); LWLockRelease(bufHdr->content_lock); UnpinBuffer(bufHdr, true); } @@ -2819,7 +2824,7 @@ FlushDatabaseBuffers(Oid dbid) { PinBuffer_Locked(bufHdr); LWLockAcquire(bufHdr->content_lock, LW_SHARED); - FlushBuffer(bufHdr, NULL); + FlushBuffer(bufHdr, NULL, false); LWLockRelease(bufHdr->content_lock); UnpinBuffer(bufHdr, true); } diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c index 1f69c9e..fead234 100644 --- a/src/backend/storage/file/fd.c +++ b/src/backend/storage/file/fd.c @@ -1417,6 +1417,32 @@ retry: } int +FileFlush(File file, off_t offset, int amount) +{ +#ifdef __linux__ + int returnCode; + + Assert(FileIsValid(file)); + + DO_DB(elog(LOG, "FileFlush: %d (%s) " INT64_FORMAT " %d", + file, VfdCache[file].fileName, + (int64) offset, amount)); + + returnCode = FileAccess(file); + if (returnCode < 0) + return returnCode; + + returnCode = sync_file_range(VfdCache[file].fd, offset, amount, + SYNC_FILE_RANGE_WRITE); + + return returnCode; +#else + Assert(FileIsValid(file)); + return 0; +#endif +} + +int FileSync(File file) { int returnCode; diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c index 167d61c..54cefea 100644 --- a/src/backend/storage/smgr/md.c +++ b/src/backend/storage/smgr/md.c @@ -800,6 +800,28 @@ mdwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, } /* + * mdflush() -- Trigger dirty buffer being written out to disk + */ +void +mdflush(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum) +{ + off_t seekpos; + MdfdVec *v; + + v = _mdfd_getseg(reln, forknum, blocknum, false, EXTENSION_FAIL); + + seekpos = (off_t) BLCKSZ *(blocknum % ((BlockNumber) RELSEG_SIZE)); + + if (FileFlush(v->mdfd_vfd, seekpos, BLCKSZ) < 0) + { + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not flush block %u in file \"%s\": %m", + blocknum, FilePathName(v->mdfd_vfd)))); + } +} + +/* * mdnblocks() -- Get the number of blocks stored in a relation. * * Important side effect: all active segments of the relation are opened diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c index d16f559..bf6bc70 100644 --- a/src/backend/storage/smgr/smgr.c +++ b/src/backend/storage/smgr/smgr.c @@ -53,6 +53,8 @@ typedef struct f_smgr BlockNumber blocknum, char *buffer); void (*smgr_write) (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync); + void (*smgr_flush) (SMgrRelation reln, ForkNumber forknum, + BlockNumber blocknum); BlockNumber (*smgr_nblocks) (SMgrRelation reln, ForkNumber forknum); void (*smgr_truncate) (SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks); @@ -66,7 +68,7 @@ typedef struct f_smgr static const f_smgr smgrsw[] = { /* magnetic disk */ {mdinit, NULL, mdclose, mdcreate, mdexists, mdunlink, mdextend, - mdprefetch, mdread, mdwrite, mdnblocks, mdtruncate, mdimmedsync, + mdprefetch, mdread, mdwrite, mdflush, mdnblocks, mdtruncate, mdimmedsync, mdpreckpt, mdsync, mdpostckpt } }; @@ -651,6 +653,16 @@ smgrwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, } /* + * smgrflush() -- Trigger dirty OS buffer to be written out to disk + */ +void +smgrflush(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum) +{ + (*(smgrsw[reln->smgr_which].smgr_flush)) (reln, forknum, blocknum); +} + + +/* * smgrnblocks() -- Calculate the number of blocks in the * supplied relation. */ diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index a8a17c2..0316b52 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -1083,6 +1083,16 @@ static struct config_bool ConfigureNamesBool[] = }, { + {"sync_on_checkpoint_flush", PGC_POSTMASTER, RESOURCES_ASYNCHRONOUS, + gettext_noop("preflush data during checkpoints."), + NULL + }, + &sync_on_checkpoint_flush, + false, + NULL, NULL, NULL + }, + + { {"update_process_title", PGC_SUSET, STATS_COLLECTOR, gettext_noop("Updates the process title to show the active SQL command."), gettext_noop("Enables updating of the process title every time a new SQL command is received by the server.") diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h index 42d9120..ba6a4b1 100644 --- a/src/include/storage/bufmgr.h +++ b/src/include/storage/bufmgr.h @@ -52,6 +52,7 @@ extern int bgwriter_lru_maxpages; extern double bgwriter_lru_multiplier; extern bool track_io_timing; extern int target_prefetch_pages; +extern bool sync_on_checkpoint_flush; /* in buf_init.c */ extern PGDLLIMPORT char *BufferBlocks; diff --git a/src/include/storage/fd.h b/src/include/storage/fd.h index a6df8fb..f7dff39 100644 --- a/src/include/storage/fd.h +++ b/src/include/storage/fd.h @@ -71,6 +71,7 @@ extern void FileClose(File file); extern int FilePrefetch(File file, off_t offset, int amount); extern int FileRead(File file, char *buffer, int amount); extern int FileWrite(File file, char *buffer, int amount); +extern int FileFlush(File file, off_t offset, int amount); extern int FileSync(File file); extern off_t FileSeek(File file, off_t offset, int whence); extern int FileTruncate(File file, off_t offset); diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h index ba7c909..6cd71ac 100644 --- a/src/include/storage/smgr.h +++ b/src/include/storage/smgr.h @@ -96,6 +96,9 @@ extern void smgrread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer); extern void smgrwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync); +extern void smgrflush(SMgrRelation reln, ForkNumber forknum, + BlockNumber blocknum); + extern BlockNumber smgrnblocks(SMgrRelation reln, ForkNumber forknum); extern void smgrtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks); @@ -122,6 +125,8 @@ extern void mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer); extern void mdwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync); +extern void mdflush(SMgrRelation reln, ForkNumber forknum, + BlockNumber blocknum); extern BlockNumber mdnblocks(SMgrRelation reln, ForkNumber forknum); extern void mdtruncate(SMgrRelation reln, ForkNumber forknum, BlockNumber nblocks);