diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 4576970..eab30a6 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -78,6 +78,7 @@ bool zero_damaged_pages = false;
int bgwriter_lru_maxpages = 100;
double bgwriter_lru_multiplier = 2.0;
bool track_io_timing = false;
+bool sync_on_checkpoint_flush = false;
/*
* How many buffers PrefetchBuffer callers should try to stay ahead of their
@@ -356,7 +357,7 @@ static bool PinBuffer(volatile BufferDesc *buf, BufferAccessStrategy strategy);
static void PinBuffer_Locked(volatile BufferDesc *buf);
static void UnpinBuffer(volatile BufferDesc *buf, bool fixOwner);
static void BufferSync(int flags);
-static int SyncOneBuffer(int buf_id, bool skip_recently_used);
+static int SyncOneBuffer(int buf_id, bool skip_recently_used, bool flush_to_disk);
static void WaitIO(volatile BufferDesc *buf);
static bool StartBufferIO(volatile BufferDesc *buf, bool forInput);
static void TerminateBufferIO(volatile BufferDesc *buf, bool clear_dirty,
@@ -369,7 +370,7 @@ static volatile BufferDesc *BufferAlloc(SMgrRelation smgr,
BlockNumber blockNum,
BufferAccessStrategy strategy,
bool *foundPtr);
-static void FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln);
+static void FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln, bool flush_to_disk);
static void AtProcExit_Buffers(int code, Datum arg);
static void CheckForBufferLeaks(void);
static int rnode_comparator(const void *p1, const void *p2);
@@ -947,7 +948,7 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, ForkNumber forkNum,
smgr->smgr_rnode.node.dbNode,
smgr->smgr_rnode.node.relNode);
- FlushBuffer(buf, NULL);
+ FlushBuffer(buf, NULL, false);
LWLockRelease(buf->content_lock);
TRACE_POSTGRESQL_BUFFER_WRITE_DIRTY_DONE(forkNum, blockNum,
@@ -1573,7 +1574,7 @@ BufferSync(int flags)
*/
if (bufHdr->flags & BM_CHECKPOINT_NEEDED)
{
- if (SyncOneBuffer(buf_id, false) & BUF_WRITTEN)
+ if (SyncOneBuffer(buf_id, false, true) & BUF_WRITTEN)
{
TRACE_POSTGRESQL_BUFFER_SYNC_WRITTEN(buf_id);
BgWriterStats.m_buf_written_checkpoints++;
@@ -1850,7 +1851,7 @@ BgBufferSync(void)
/* Execute the LRU scan */
while (num_to_scan > 0 && reusable_buffers < upcoming_alloc_est)
{
- int buffer_state = SyncOneBuffer(next_to_clean, true);
+ int buffer_state = SyncOneBuffer(next_to_clean, true, false);
if (++next_to_clean >= NBuffers)
{
@@ -1927,7 +1928,7 @@ BgBufferSync(void)
* Note: caller must have done ResourceOwnerEnlargeBuffers.
*/
static int
-SyncOneBuffer(int buf_id, bool skip_recently_used)
+SyncOneBuffer(int buf_id, bool skip_recently_used, bool flush_to_disk)
{
volatile BufferDesc *bufHdr = &BufferDescriptors[buf_id];
int result = 0;
@@ -1966,7 +1967,7 @@ SyncOneBuffer(int buf_id, bool skip_recently_used)
PinBuffer_Locked(bufHdr);
LWLockAcquire(bufHdr->content_lock, LW_SHARED);
- FlushBuffer(bufHdr, NULL);
+ FlushBuffer(bufHdr, NULL, flush_to_disk);
LWLockRelease(bufHdr->content_lock);
UnpinBuffer(bufHdr, true);
@@ -2230,7 +2231,7 @@ BufferGetTag(Buffer buffer, RelFileNode *rnode, ForkNumber *forknum,
* as the second parameter. If not, pass NULL.
*/
static void
-FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln)
+FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln, bool flush_to_disk)
{
XLogRecPtr recptr;
ErrorContextCallback errcallback;
@@ -2342,6 +2343,10 @@ FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln)
reln->smgr_rnode.node.spcNode,
reln->smgr_rnode.node.dbNode,
reln->smgr_rnode.node.relNode);
+ if (flush_to_disk && sync_on_checkpoint_flush)
+ smgrflush(reln,
+ buf->tag.forkNum,
+ buf->tag.blockNum);
/* Pop the error context stack */
error_context_stack = errcallback.previous;
@@ -2769,7 +2774,7 @@ FlushRelationBuffers(Relation rel)
{
PinBuffer_Locked(bufHdr);
LWLockAcquire(bufHdr->content_lock, LW_SHARED);
- FlushBuffer(bufHdr, rel->rd_smgr);
+ FlushBuffer(bufHdr, rel->rd_smgr, false);
LWLockRelease(bufHdr->content_lock);
UnpinBuffer(bufHdr, true);
}
@@ -2819,7 +2824,7 @@ FlushDatabaseBuffers(Oid dbid)
{
PinBuffer_Locked(bufHdr);
LWLockAcquire(bufHdr->content_lock, LW_SHARED);
- FlushBuffer(bufHdr, NULL);
+ FlushBuffer(bufHdr, NULL, false);
LWLockRelease(bufHdr->content_lock);
UnpinBuffer(bufHdr, true);
}
diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c
index 1f69c9e..fead234 100644
--- a/src/backend/storage/file/fd.c
+++ b/src/backend/storage/file/fd.c
@@ -1417,6 +1417,32 @@ retry:
}
int
+FileFlush(File file, off_t offset, int amount)
+{
+#ifdef __linux__
+ int returnCode;
+
+ Assert(FileIsValid(file));
+
+ DO_DB(elog(LOG, "FileFlush: %d (%s) " INT64_FORMAT " %d",
+ file, VfdCache[file].fileName,
+ (int64) offset, amount));
+
+ returnCode = FileAccess(file);
+ if (returnCode < 0)
+ return returnCode;
+
+ returnCode = sync_file_range(VfdCache[file].fd, offset, amount,
+ SYNC_FILE_RANGE_WRITE);
+
+ return returnCode;
+#else
+ Assert(FileIsValid(file));
+ return 0;
+#endif
+}
+
+int
FileSync(File file)
{
int returnCode;
diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c
index 167d61c..54cefea 100644
--- a/src/backend/storage/smgr/md.c
+++ b/src/backend/storage/smgr/md.c
@@ -800,6 +800,28 @@ mdwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
}
/*
+ * mdflush() -- Trigger dirty buffer being written out to disk
+ */
+void
+mdflush(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
+{
+ off_t seekpos;
+ MdfdVec *v;
+
+ v = _mdfd_getseg(reln, forknum, blocknum, false, EXTENSION_FAIL);
+
+ seekpos = (off_t) BLCKSZ *(blocknum % ((BlockNumber) RELSEG_SIZE));
+
+ if (FileFlush(v->mdfd_vfd, seekpos, BLCKSZ) < 0)
+ {
+ ereport(ERROR,
+ (errcode_for_file_access(),
+ errmsg("could not flush block %u in file \"%s\": %m",
+ blocknum, FilePathName(v->mdfd_vfd))));
+ }
+}
+
+/*
* mdnblocks() -- Get the number of blocks stored in a relation.
*
* Important side effect: all active segments of the relation are opened
diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c
index d16f559..bf6bc70 100644
--- a/src/backend/storage/smgr/smgr.c
+++ b/src/backend/storage/smgr/smgr.c
@@ -53,6 +53,8 @@ typedef struct f_smgr
BlockNumber blocknum, char *buffer);
void (*smgr_write) (SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum, char *buffer, bool skipFsync);
+ void (*smgr_flush) (SMgrRelation reln, ForkNumber forknum,
+ BlockNumber blocknum);
BlockNumber (*smgr_nblocks) (SMgrRelation reln, ForkNumber forknum);
void (*smgr_truncate) (SMgrRelation reln, ForkNumber forknum,
BlockNumber nblocks);
@@ -66,7 +68,7 @@ typedef struct f_smgr
static const f_smgr smgrsw[] = {
/* magnetic disk */
{mdinit, NULL, mdclose, mdcreate, mdexists, mdunlink, mdextend,
- mdprefetch, mdread, mdwrite, mdnblocks, mdtruncate, mdimmedsync,
+ mdprefetch, mdread, mdwrite, mdflush, mdnblocks, mdtruncate, mdimmedsync,
mdpreckpt, mdsync, mdpostckpt
}
};
@@ -651,6 +653,16 @@ smgrwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
}
/*
+ * smgrflush() -- Trigger dirty OS buffer to be written out to disk
+ */
+void
+smgrflush(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum)
+{
+ (*(smgrsw[reln->smgr_which].smgr_flush)) (reln, forknum, blocknum);
+}
+
+
+/*
* smgrnblocks() -- Calculate the number of blocks in the
* supplied relation.
*/
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index a8a17c2..0316b52 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -1083,6 +1083,16 @@ static struct config_bool ConfigureNamesBool[] =
},
{
+ {"sync_on_checkpoint_flush", PGC_POSTMASTER, RESOURCES_ASYNCHRONOUS,
+ gettext_noop("preflush data during checkpoints."),
+ NULL
+ },
+ &sync_on_checkpoint_flush,
+ false,
+ NULL, NULL, NULL
+ },
+
+ {
{"update_process_title", PGC_SUSET, STATS_COLLECTOR,
gettext_noop("Updates the process title to show the active SQL command."),
gettext_noop("Enables updating of the process title every time a new SQL command is received by the server.")
diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h
index 42d9120..ba6a4b1 100644
--- a/src/include/storage/bufmgr.h
+++ b/src/include/storage/bufmgr.h
@@ -52,6 +52,7 @@ extern int bgwriter_lru_maxpages;
extern double bgwriter_lru_multiplier;
extern bool track_io_timing;
extern int target_prefetch_pages;
+extern bool sync_on_checkpoint_flush;
/* in buf_init.c */
extern PGDLLIMPORT char *BufferBlocks;
diff --git a/src/include/storage/fd.h b/src/include/storage/fd.h
index a6df8fb..f7dff39 100644
--- a/src/include/storage/fd.h
+++ b/src/include/storage/fd.h
@@ -71,6 +71,7 @@ extern void FileClose(File file);
extern int FilePrefetch(File file, off_t offset, int amount);
extern int FileRead(File file, char *buffer, int amount);
extern int FileWrite(File file, char *buffer, int amount);
+extern int FileFlush(File file, off_t offset, int amount);
extern int FileSync(File file);
extern off_t FileSeek(File file, off_t offset, int whence);
extern int FileTruncate(File file, off_t offset);
diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h
index ba7c909..6cd71ac 100644
--- a/src/include/storage/smgr.h
+++ b/src/include/storage/smgr.h
@@ -96,6 +96,9 @@ extern void smgrread(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum, char *buffer);
extern void smgrwrite(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum, char *buffer, bool skipFsync);
+extern void smgrflush(SMgrRelation reln, ForkNumber forknum,
+ BlockNumber blocknum);
+
extern BlockNumber smgrnblocks(SMgrRelation reln, ForkNumber forknum);
extern void smgrtruncate(SMgrRelation reln, ForkNumber forknum,
BlockNumber nblocks);
@@ -122,6 +125,8 @@ extern void mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
char *buffer);
extern void mdwrite(SMgrRelation reln, ForkNumber forknum,
BlockNumber blocknum, char *buffer, bool skipFsync);
+extern void mdflush(SMgrRelation reln, ForkNumber forknum,
+ BlockNumber blocknum);
extern BlockNumber mdnblocks(SMgrRelation reln, ForkNumber forknum);
extern void mdtruncate(SMgrRelation reln, ForkNumber forknum,
BlockNumber nblocks);