diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 938c554..0f2e4e0 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -1217,6 +1217,12 @@ BufferSync(int flags)
 	int			num_to_write;
 	int			num_written;
 	int			mask = BM_DIRTY;
+	RelFileNode target_rnode = { 0, 0, 0 };
+	ForkNumber	target_forkNum = InvalidForkNumber;
+	int			target_segno = 0;
+	bool		has_target = false;
+	int			outer_num_to_scan = 0;
+	int			outer_buf_id = 0;

 	/* Make sure we can handle the pin inside SyncOneBuffer */
 	ResourceOwnerEnlargeBuffers(CurrentResourceOwner);
@@ -1281,10 +1287,30 @@ BufferSync(int flags)
 	buf_id = StrategySyncStart(NULL, NULL);
 	num_to_scan = NBuffers;
 	num_written = 0;
-	while (num_to_scan-- > 0)
+
+	for (;;)
 	{
 		volatile BufferDesc *bufHdr = &BufferDescriptors[buf_id];

+		if (num_to_scan == 0)
+		{
+			if (has_target)
+			{
+				/*
+				 * We have finished writing out buffers belonging to this
+				 * relation segment. fsync it now.
+				 */
+				mdsync_seg(target_rnode, target_forkNum, target_segno);
+
+				/* continue the outer scan where we left */
+				num_to_scan = outer_num_to_scan;
+				buf_id = outer_buf_id;
+				has_target = false;
+			}
+			else
+				break; /* all done! */
+		}
+
 		/*
 		 * We don't need to acquire the lock here, because we're only looking
 		 * at a single bit. It's possible that someone else writes the buffer
@@ -1299,7 +1325,36 @@ BufferSync(int flags)
 		 */
 		if (bufHdr->flags & BM_CHECKPOINT_NEEDED)
 		{
-			if (SyncOneBuffer(buf_id, false) & BUF_WRITTEN)
+			RelFileNode this_rnode = bufHdr->tag.rnode;
+			ForkNumber	this_forkNum = bufHdr->tag.forkNum;
+			int			this_segno = bufHdr->tag.blockNum / (RELSEG_SIZE / BLCKSZ);
+			bool		skip = false;
+
+			if (has_target)
+			{
+				if (!RelFileNodeEquals(this_rnode, target_rnode) ||
+					this_forkNum != target_forkNum ||
+					this_segno != target_segno)
+				{
+					/*
+					 * This buffer belongs to another relation than the one
+					 * we're targeting right now. We'll revisit it later.
+					 */
+					skip = true;
+				}
+			}
+			else
+			{
+				target_rnode = this_rnode;
+				target_forkNum = this_forkNum;
+				target_segno = this_segno;
+				has_target = true;
+				/* remember where we left the outer scan */
+				outer_buf_id = buf_id;
+				outer_num_to_scan = num_to_scan;
+			}
+
+			if (!skip && SyncOneBuffer(buf_id, false) & BUF_WRITTEN)
 			{
 				TRACE_POSTGRESQL_BUFFER_SYNC_WRITTEN(buf_id);
 				BgWriterStats.m_buf_written_checkpoints++;
@@ -1328,6 +1383,7 @@ BufferSync(int flags)

 		if (++buf_id >= NBuffers)
 			buf_id = 0;
+		num_to_scan--;
 	}

 	/*
diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c
index 167d61c..9dedef9 100644
--- a/src/backend/storage/smgr/md.c
+++ b/src/backend/storage/smgr/md.c
@@ -1257,14 +1257,192 @@ mdsync(void)
 	}							/* end loop over hashtable entries */

 	/* Return sync performance metrics for report at checkpoint end */
-	CheckpointStats.ckpt_sync_rels = processed;
-	CheckpointStats.ckpt_longest_sync = longest;
-	CheckpointStats.ckpt_agg_sync_time = total_elapsed;
+	CheckpointStats.ckpt_sync_rels += processed;
+	if (longest > CheckpointStats.ckpt_longest_sync)
+		CheckpointStats.ckpt_longest_sync = longest;
+	CheckpointStats.ckpt_agg_sync_time += total_elapsed;

 	/* Flag successful completion of mdsync */
 	mdsync_in_progress = false;
 }

+
+/*
+ *	mdsync_seg() -- Sync previous writes to a particular relation segment to stable storage.
+ */
+void
+mdsync_seg(RelFileNode rnode, ForkNumber forknum, int segno)
+{
+	PendingOperationEntry *entry;
+
+	/* Statistics on sync times */
+	int			processed = 0;
+	instr_time	sync_start,
+				sync_end,
+				sync_diff;
+	uint64		elapsed;
+	uint64		longest = 0;
+	uint64		total_elapsed = 0;
+
+	/*
+	 * This is only called during checkpoints, and checkpoints should only
+	 * occur in processes that have created a pendingOpsTable.
+	 */
+	if (!pendingOpsTable)
+		elog(ERROR, "cannot sync without a pendingOpsTable");
+
+	/*
+	 * If fsync is off, do nothing. (We'll clear the pendingOpsTable later,
+	 * in mdsync)
+	 */
+	if (!enableFsync)
+		return;
+
+	/*
+	 * If we are in the checkpointer, the sync had better include all fsync
+	 * requests that were queued by backends up to this point.  The tightest
+	 * race condition that could occur is that a buffer that must be written
+	 * and fsync'd for the checkpoint could have been dumped by a backend just
+	 * before it was visited by BufferSync().  We know the backend will have
+	 * queued an fsync request before clearing the buffer's dirtybit, so we
+	 * are safe as long as we do an Absorb after completing BufferSync().
+	 */
+	AbsorbFsyncRequests();
+
+	/* Now scan the hashtable for fsync requests to process */
+	entry = (PendingOperationEntry *) hash_search(pendingOpsTable,
+												  &rnode,
+												  HASH_FIND,
+												  NULL);
+	if (entry)
+	{
+		if (bms_is_member(segno, entry->requests[forknum]))
+		{
+			int			failures;
+
+			/*
+			 * The fsync table could contain requests to fsync segments
+			 * that have been deleted (unlinked) by the time we get to
+			 * them. Rather than just hoping an ENOENT (or EACCES on
+			 * Windows) error can be ignored, what we do on error is
+			 * absorb pending requests and then retry.  Since mdunlink()
+			 * queues a "cancel" message before actually unlinking, the
+			 * fsync request is guaranteed to be marked canceled after the
+			 * absorb if it really was this case. DROP DATABASE likewise
+			 * has to tell us to forget fsync requests before it starts
+			 * deletions.
+			 */
+			for (failures = 0;; failures++) /* loop exits at "break" */
+			{
+				SMgrRelation reln;
+				MdfdVec    *seg;
+				char	   *path;
+				int			save_errno;
+
+				/*
+				 * Find or create an smgr hash entry for this relation.
+				 * This may seem a bit unclean -- md calling smgr?	But
+				 * it's really the best solution.  It ensures that the
+				 * open file reference isn't permanently leaked if we get
+				 * an error here. (You may say "but an unreferenced
+				 * SMgrRelation is still a leak!" Not really, because the
+				 * only case in which a checkpoint is done by a process
+				 * that isn't about to shut down is in the checkpointer,
+				 * and it will periodically do smgrcloseall(). This fact
+				 * justifies our not closing the reln in the success path
+				 * either, which is a good thing since in non-checkpointer
+				 * cases we couldn't safely do that.)
+				 */
+				reln = smgropen(entry->rnode, InvalidBackendId);
+
+				/* Attempt to open and fsync the target segment */
+				seg = _mdfd_getseg(reln, forknum,
+							 (BlockNumber) segno * (BlockNumber) RELSEG_SIZE,
+								   false, EXTENSION_RETURN_NULL);
+
+				INSTR_TIME_SET_CURRENT(sync_start);
+
+				if (seg != NULL &&
+					FileSync(seg->mdfd_vfd) >= 0)
+				{
+					/* Success; update statistics about sync timing */
+					INSTR_TIME_SET_CURRENT(sync_end);
+					sync_diff = sync_end;
+					INSTR_TIME_SUBTRACT(sync_diff, sync_start);
+					elapsed = INSTR_TIME_GET_MICROSEC(sync_diff);
+					if (elapsed > longest)
+						longest = elapsed;
+					total_elapsed += elapsed;
+					processed++;
+					if (log_checkpoints)
+						elog(LOG, "checkpoint sync: number=%d file=%s time=%.3f msec",
+							 processed,
+							 FilePathName(seg->mdfd_vfd),
+							 (double) elapsed / 1000);
+
+					entry->requests[forknum] =
+						bms_del_member(entry->requests[forknum], segno);
+
+					break;	/* out of retry loop */
+				}
+
+				/* Compute file name for use in message */
+				save_errno = errno;
+				path = _mdfd_segpath(reln, forknum, (BlockNumber) segno);
+				errno = save_errno;
+
+				/*
+				 * It is possible that the relation has been dropped or
+				 * truncated since the fsync request was entered.
+				 * Therefore, allow ENOENT, but only if we didn't fail
+				 * already on this file.  This applies both for
+				 * _mdfd_getseg() and for FileSync, since fd.c might have
+				 * closed the file behind our back.
+				 *
+				 * XXX is there any point in allowing more than one retry?
+				 * Don't see one at the moment, but easy to change the
+				 * test here if so.
+				 */
+				if (!FILE_POSSIBLY_DELETED(errno) ||
+					failures > 0)
+					ereport(ERROR,
+							(errcode_for_file_access(),
+							 errmsg("could not fsync file \"%s\": %m",
+									path)));
+				else
+					ereport(DEBUG1,
+							(errcode_for_file_access(),
+						errmsg("could not fsync file \"%s\" but retrying: %m",
+							   path)));
+				pfree(path);
+
+				/*
+				 * Absorb incoming requests and check to see if a cancel
+				 * arrived for this relation fork.
+				 */
+				AbsorbFsyncRequests();
+
+				if (entry->canceled[forknum])
+					break;
+			}				/* end retry loop */
+		}
+
+		/*
+		 * XXX: We don't bother removing the pendingOpsTable entry here, if
+		 * it's now empty. The mdsync() call later in the checkpoint will do
+		 * that.
+		 */
+	}							/* end loop over hashtable entries */
+
+
+	/* Return sync performance metrics for report at checkpoint end */
+	CheckpointStats.ckpt_sync_rels += processed;
+	if (longest > CheckpointStats.ckpt_longest_sync)
+		CheckpointStats.ckpt_longest_sync = longest;
+	CheckpointStats.ckpt_agg_sync_time += total_elapsed;
+}
+
+
 /*
  * mdpreckpt() -- Do pre-checkpoint work
  *
diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h
index ba7c909..ebfa218 100644
--- a/src/include/storage/smgr.h
+++ b/src/include/storage/smgr.h
@@ -128,6 +128,7 @@ extern void mdtruncate(SMgrRelation reln, ForkNumber forknum,
 extern void mdimmedsync(SMgrRelation reln, ForkNumber forknum);
 extern void mdpreckpt(void);
 extern void mdsync(void);
+extern void mdsync_seg(RelFileNode rnode, ForkNumber forknum, int segno);
 extern void mdpostckpt(void);

 extern void SetForwardFsyncRequests(void);