diff --git a/src/backend/access/common/heaptuple.c b/src/backend/access/common/heaptuple.c
index e39b977..acf88de 100644
--- a/src/backend/access/common/heaptuple.c
+++ b/src/backend/access/common/heaptuple.c
@@ -60,7 +60,11 @@
 #include "access/sysattr.h"
 #include "access/tuptoaster.h"
 #include "executor/tuptable.h"
+#include "utils/datum.h"
+#include "utils/pg_lzcompress.h"
 
+/* guc variable for EWT compression ratio*/
+int			wal_update_compression_ratio = 25;
 
 /* Does att's datatype allow packing into the 1-byte-header varlena format? */
 #define ATT_IS_PACKABLE(att) \
@@ -617,6 +621,49 @@ heap_copytuple_with_tuple(HeapTuple src, HeapTuple dest)
 	memcpy((char *) dest->t_data, (char *) src->t_data, src->t_len);
 }
 
+/* ----------------
+ * heap_delta_encode
+ *
+ *		Calculate the delta between two tuples, using pglz. The result is
+ * stored in *encdata. *encdata must point to a PGLZ_header buffer, with at
+ * least PGLZ_MAX_OUTPUT(newtup->t_len) bytes.
+ * ----------------
+ */
+bool
+heap_delta_encode(TupleDesc tupleDesc, HeapTuple oldtup, HeapTuple newtup,
+				  char *encdata, uint32 *enclen)
+{
+	PGLZ_Strategy strategy;
+
+	strategy = *PGLZ_strategy_default;
+	strategy.min_comp_rate = wal_update_compression_ratio;
+
+	return pglz_delta_encode(
+		(char *) newtup->t_data + offsetof(HeapTupleHeaderData, t_bits),
+		newtup->t_len - offsetof(HeapTupleHeaderData, t_bits),
+		(char *) oldtup->t_data + offsetof(HeapTupleHeaderData, t_bits),
+		oldtup->t_len - offsetof(HeapTupleHeaderData, t_bits),
+		encdata, enclen, &strategy
+		);
+}
+
+/* ----------------
+ * heap_delta_decode
+ *
+ *		Decode a tuple using delta-encoded WAL tuple and old tuple version.
+ * ----------------
+ */
+void
+heap_delta_decode(char *encdata, uint32 enclen, HeapTuple oldtup, HeapTuple newtup)
+{
+	return pglz_delta_decode(encdata, enclen,
+							 (char *) newtup->t_data + offsetof(HeapTupleHeaderData, t_bits),
+							 MaxHeapTupleSize - offsetof(HeapTupleHeaderData, t_bits),
+							 &newtup->t_len,
+							 (char *) oldtup->t_data + offsetof(HeapTupleHeaderData, t_bits),
+							 oldtup->t_len - offsetof(HeapTupleHeaderData, t_bits));
+}
+
 /*
  * heap_form_tuple
  *		construct a tuple from the given values[] and isnull[] arrays,
diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index fe56318..24c117c 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -70,10 +70,12 @@
 #include "utils/snapmgr.h"
 #include "utils/syscache.h"
 #include "utils/tqual.h"
+#include "utils/pg_lzcompress.h"
 
 
 /* GUC variable */
 bool		synchronize_seqscans = true;
+extern int     wal_update_compression_ratio;
 
 
 static HeapScanDesc heap_beginscan_internal(Relation relation,
@@ -5815,6 +5817,12 @@ log_heap_update(Relation reln, Buffer oldbuf,
 	XLogRecPtr	recptr;
 	XLogRecData rdata[4];
 	Page		page = BufferGetPage(newbuf);
+	char	   *newtupdata;
+	int			newtuplen;
+	bool		compressed = false;
+
+	/* Structure which holds EWT */
+	char		buf[MaxHeapTupleSize];
 
 	/* Caller should not call me on a non-WAL-logged relation */
 	Assert(RelationNeedsWAL(reln));
@@ -5824,15 +5832,47 @@ log_heap_update(Relation reln, Buffer oldbuf,
 	else
 		info = XLOG_HEAP_UPDATE;
 
+	newtupdata = ((char *) newtup->t_data) + offsetof(HeapTupleHeaderData, t_bits);
+	newtuplen = newtup->t_len - offsetof(HeapTupleHeaderData, t_bits);
+
+	/*
+	 * EWT can be generated for all new tuple versions created by Update
+	 * operation. Currently we do it when both the old and new tuple versions
+	 * are on same page, because during recovery if the page containing old
+	 * tuple is corrupt, it should not cascade that corruption to other pages.
+	 * Under the general assumption that for long runs most updates tend to
+	 * create new tuple version on same page, there should not be significant
+	 * impact on WAL reduction or performance.
+	 *
+	 * We should not generate EWT when we need to backup the whole bolck in
+	 * WAL as in that case there is no saving by reduced WAL size.
+	 */
+	if (wal_update_compression_ratio != 0 && (oldbuf == newbuf) && !XLogCheckBufferNeedsBackup(newbuf))
+	{
+		uint32 enclen;
+		/* Delta-encode the new tuple using the old tuple */
+		if (heap_delta_encode(reln->rd_att, oldtup, newtup, buf, &enclen))
+		{
+			compressed = true;
+			newtupdata = buf;
+			newtuplen = enclen;
+		}
+	}
+
+	xlrec.flags = 0;
 	xlrec.target.node = reln->rd_node;
 	xlrec.target.tid = oldtup->t_self;
 	xlrec.old_xmax = HeapTupleHeaderGetRawXmax(oldtup->t_data);
 	xlrec.old_infobits_set = compute_infobits(oldtup->t_data->t_infomask,
 											  oldtup->t_data->t_infomask2);
 	xlrec.new_xmax = HeapTupleHeaderGetRawXmax(newtup->t_data);
-	xlrec.all_visible_cleared = all_visible_cleared;
+	if (all_visible_cleared)
+		xlrec.flags |= XL_HEAP_UPDATE_ALL_VISIBLE_CLEARED;
 	xlrec.newtid = newtup->t_self;
-	xlrec.new_all_visible_cleared = new_all_visible_cleared;
+	if (new_all_visible_cleared)
+		xlrec.flags |= XL_HEAP_UPDATE_NEW_ALL_VISIBLE_CLEARED;
+	if (compressed)
+		xlrec.flags |= XL_HEAP_UPDATE_DELTA_ENCODED;
 
 	rdata[0].data = (char *) &xlrec;
 	rdata[0].len = SizeOfHeapUpdate;
@@ -5859,9 +5899,12 @@ log_heap_update(Relation reln, Buffer oldbuf,
 	rdata[2].buffer_std = true;
 	rdata[2].next = &(rdata[3]);
 
-	/* PG73FORMAT: write bitmap [+ padding] [+ oid] + data */
-	rdata[3].data = (char *) newtup->t_data + offsetof(HeapTupleHeaderData, t_bits);
-	rdata[3].len = newtup->t_len - offsetof(HeapTupleHeaderData, t_bits);
+	/*
+	 * PG73FORMAT: write bitmap [+ padding] [+ oid] + data follows .........
+	 * OR PG93FORMAT [If encoded]: LZ header + Encoded data follows
+	 */
+	rdata[3].data = newtupdata;
+	rdata[3].len = newtuplen;
 	rdata[3].buffer = newbuf;
 	rdata[3].buffer_std = true;
 	rdata[3].next = NULL;
@@ -6671,7 +6714,10 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool hot_update)
 	Page		page;
 	OffsetNumber offnum;
 	ItemId		lp = NULL;
+	HeapTupleData newtup;
+	HeapTupleData oldtup;
 	HeapTupleHeader htup;
+	HeapTupleHeader oldtupdata = NULL;
 	struct
 	{
 		HeapTupleHeaderData hdr;
@@ -6686,7 +6732,7 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool hot_update)
 	 * The visibility map may need to be fixed even if the heap page is
 	 * already up-to-date.
 	 */
-	if (xlrec->all_visible_cleared)
+	if (xlrec->flags & XL_HEAP_UPDATE_ALL_VISIBLE_CLEARED)
 	{
 		Relation	reln = CreateFakeRelcacheEntry(xlrec->target.node);
 		BlockNumber block = ItemPointerGetBlockNumber(&xlrec->target.tid);
@@ -6746,7 +6792,7 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool hot_update)
 	if (PageGetMaxOffsetNumber(page) < offnum || !ItemIdIsNormal(lp))
 		elog(PANIC, "heap_update_redo: invalid lp");
 
-	htup = (HeapTupleHeader) PageGetItem(page, lp);
+	oldtupdata = htup = (HeapTupleHeader) PageGetItem(page, lp);
 
 	htup->t_infomask &= ~(HEAP_XMAX_BITS | HEAP_MOVED);
 	htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
@@ -6764,7 +6810,7 @@ heap_xlog_update(XLogRecPtr lsn, XLogRecord *record, bool hot_update)
 	/* Mark the page as a candidate for pruning */
 	PageSetPrunable(page, record->xl_xid);
 
-	if (xlrec->all_visible_cleared)
+	if (xlrec->flags & XL_HEAP_UPDATE_ALL_VISIBLE_CLEARED)
 		PageClearAllVisible(page);
 
 	/*
@@ -6788,7 +6834,7 @@ newt:;
 	 * The visibility map may need to be fixed even if the heap page is
 	 * already up-to-date.
 	 */
-	if (xlrec->new_all_visible_cleared)
+	if (xlrec->flags & XL_HEAP_UPDATE_NEW_ALL_VISIBLE_CLEARED)
 	{
 		Relation	reln = CreateFakeRelcacheEntry(xlrec->target.node);
 		BlockNumber block = ItemPointerGetBlockNumber(&xlrec->newtid);
@@ -6851,10 +6897,31 @@ newsame:;
 		   SizeOfHeapHeader);
 	htup = &tbuf.hdr;
 	MemSet((char *) htup, 0, sizeof(HeapTupleHeaderData));
-	/* PG73FORMAT: get bitmap [+ padding] [+ oid] + data */
-	memcpy((char *) htup + offsetof(HeapTupleHeaderData, t_bits),
-		   (char *) xlrec + hsize,
-		   newlen);
+
+	/*
+	 * If the record is EWT then decode it.
+	 */
+	if (xlrec->flags & XL_HEAP_UPDATE_DELTA_ENCODED)
+	{
+		/*
+		 * PG93FORMAT: Header + Control byte + history reference (2 - 3)bytes
+		 * + New data (1 byte length + variable data)+ ...
+		 */
+		oldtup.t_data = oldtupdata;
+		oldtup.t_len = ItemIdGetLength(lp);
+		newtup.t_data = htup;
+
+		heap_delta_decode((char *) xlrec + hsize, newlen, &oldtup, &newtup);
+		newlen = newtup.t_len;
+	}
+	else
+	{
+		/* PG73FORMAT: get bitmap [+ padding] [+ oid] + data */
+		memcpy((char *) htup + offsetof(HeapTupleHeaderData, t_bits),
+			   (char *) xlrec + hsize,
+			   newlen);
+	}
+
 	newlen += offsetof(HeapTupleHeaderData, t_bits);
 	htup->t_infomask2 = xlhdr.t_infomask2;
 	htup->t_infomask = xlhdr.t_infomask;
@@ -6870,7 +6937,7 @@ newsame:;
 	if (offnum == InvalidOffsetNumber)
 		elog(PANIC, "heap_update_redo: failed to add tuple");
 
-	if (xlrec->new_all_visible_cleared)
+	if (xlrec->flags & XL_HEAP_UPDATE_NEW_ALL_VISIBLE_CLEARED)
 		PageClearAllVisible(page);
 
 	freespace = PageGetHeapFreeSpace(page);		/* needed to update FSM below */
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 07c68ad..c3a94a2 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -1249,6 +1249,28 @@ begin:;
 }
 
 /*
+ * Determine whether the buffer referenced has to be backed up. Since we don't
+ * yet have the insert lock, fullPageWrites and forcePageWrites could change
+ * later, but will not cause any problem because this function is used only to
+ * identify whether EWT is required for WAL update.
+ */
+bool
+XLogCheckBufferNeedsBackup(Buffer buffer)
+{
+	bool		doPageWrites;
+	Page		page;
+
+	page = BufferGetPage(buffer);
+
+	doPageWrites = XLogCtl->Insert.fullPageWrites || XLogCtl->Insert.forcePageWrites;
+
+	if (doPageWrites && PageGetLSN(page) <= RedoRecPtr)
+		return true;			/* buffer requires backup */
+
+	return false;				/* buffer does not need to be backed up */
+}
+
+/*
  * Determine whether the buffer referenced by an XLogRecData item has to
  * be backed up, and if so fill a BkpBlock struct for it.  In any case
  * save the buffer's LSN at *lsn.
diff --git a/src/backend/utils/adt/pg_lzcompress.c b/src/backend/utils/adt/pg_lzcompress.c
index 66c64c1..35e8206 100644
--- a/src/backend/utils/adt/pg_lzcompress.c
+++ b/src/backend/utils/adt/pg_lzcompress.c
@@ -112,7 +112,7 @@
  *			of identical bytes like trailing spaces) and for bigger ones
  *			our 4K maximum look-back distance is too small.
  *
- *			The compressor creates a table for 8192 lists of positions.
+ *			The compressor creates a table for lists of positions.
  *			For each input position (except the last 3), a hash key is
  *			built from the 4 next input bytes and the position remembered
  *			in the appropriate list. Thus, the table points to linked
@@ -120,7 +120,10 @@
  *			matching strings. This is done on the fly while the input
  *			is compressed into the output area.  Table entries are only
  *			kept for the last 4096 input positions, since we cannot use
- *			back-pointers larger than that anyway.
+ *			back-pointers larger than that anyway.  The size of the hash
+ *			table depends on the size of the input - a larger table has
+ *			a larger startup cost, as it needs to be initialized to zero,
+ *			but reduces the number of hash collisions on long inputs.
  *
  *			For each byte in the input, it's hash key (built from this
  *			byte and the next 3) is used to find the appropriate list
@@ -180,8 +183,7 @@
  * Local definitions
  * ----------
  */
-#define PGLZ_HISTORY_LISTS		8192	/* must be power of 2 */
-#define PGLZ_HISTORY_MASK		(PGLZ_HISTORY_LISTS - 1)
+#define PGLZ_MAX_HISTORY_LISTS	8192	/* must be power of 2 */
 #define PGLZ_HISTORY_SIZE		4096
 #define PGLZ_MAX_MATCH			273
 
@@ -198,9 +200,9 @@
  */
 typedef struct PGLZ_HistEntry
 {
-	struct PGLZ_HistEntry *next;	/* links for my hash key's list */
-	struct PGLZ_HistEntry *prev;
-	int			hindex;			/* my current hash key */
+	int16		next;			/* links for my hash key's list */
+	int16		prev;
+	uint32		hindex;			/* my current hash key */
 	const char *pos;			/* my input position */
 } PGLZ_HistEntry;
 
@@ -241,9 +243,11 @@ const PGLZ_Strategy *const PGLZ_strategy_always = &strategy_always_data;
  * Statically allocated work arrays for history
  * ----------
  */
-static PGLZ_HistEntry *hist_start[PGLZ_HISTORY_LISTS];
-static PGLZ_HistEntry hist_entries[PGLZ_HISTORY_SIZE];
+static int16 hist_start[PGLZ_MAX_HISTORY_LISTS];
+static PGLZ_HistEntry hist_entries[PGLZ_HISTORY_SIZE + 1];
 
+/* Element 0 in hist_entries is unused, and means 'invalid'. */
+#define INVALID_ENTRY			0
 
 /* ----------
  * pglz_hist_idx -
@@ -257,12 +261,40 @@ static PGLZ_HistEntry hist_entries[PGLZ_HISTORY_SIZE];
  * hash keys more.
  * ----------
  */
-#define pglz_hist_idx(_s,_e) (												\
+#define pglz_hist_idx(_s,_e, _mask) (										\
 			((((_e) - (_s)) < 4) ? (int) (_s)[0] :							\
-			 (((_s)[0] << 9) ^ ((_s)[1] << 6) ^								\
-			  ((_s)[2] << 3) ^ (_s)[3])) & (PGLZ_HISTORY_MASK)				\
+			 (((_s)[0] << 6) ^ ((_s)[1] << 4) ^								\
+			  ((_s)[2] << 2) ^ (_s)[3])) & (_mask)							\
 		)
 
+/*
+ * pglz_hash_init and pglz_hash_roll can be use to calculate the hash in
+ * a rolling fashion. First, call pglz_hash_init, with a pointer to the first
+ * byte. Then call pglz_hash_roll for every subsequent byte. After each
+ * pglz_hash_roll() call, hindex holds the (masked) hash of the current byte.
+ *
+ * a,b,c,d are local variables these macros use to store state. These macros
+ * don't check for end-of-buffer like pglz_hist_idx() does, so these cannot be
+ * used on the last 3 bytes of input.
+ */
+#define pglz_hash_init(_p,hindex,a,b,c,d) 									\
+	do {																	\
+			a = 0;															\
+			b = _p[0];														\
+			c = _p[1];														\
+			d = _p[2];														\
+			hindex = (b << 4) ^ (c << 2) ^ d;								\
+	} while (0)
+
+#define pglz_hash_roll(_p,hindex,a,b,c,d,_mask) 							\
+	do {																	\
+		/* subtract old a */												\
+		hindex ^= a;														\
+		/* shift and add byte */											\
+		a = b; b = c; c = d; d = _p[3];										\
+		hindex = ((hindex << 2) ^ d) & (_mask);								\
+	} while (0)
+
 
 /* ----------
  * pglz_hist_add -
@@ -276,32 +308,49 @@ static PGLZ_HistEntry hist_entries[PGLZ_HISTORY_SIZE];
  * _hn and _recycle are modified in the macro.
  * ----------
  */
-#define pglz_hist_add(_hs,_he,_hn,_recycle,_s,_e) \
+#define pglz_hist_add(_hs,_he,_hn,_recycle,_s,_e, _hindex)	\
 do {									\
-			int __hindex = pglz_hist_idx((_s),(_e));						\
-			PGLZ_HistEntry **__myhsp = &(_hs)[__hindex];					\
+			int16 *__myhsp = &(_hs)[_hindex];								\
 			PGLZ_HistEntry *__myhe = &(_he)[_hn];							\
 			if (_recycle) {													\
-				if (__myhe->prev == NULL)									\
+				if (__myhe->prev == INVALID_ENTRY)							\
 					(_hs)[__myhe->hindex] = __myhe->next;					\
 				else														\
-					__myhe->prev->next = __myhe->next;						\
-				if (__myhe->next != NULL)									\
-					__myhe->next->prev = __myhe->prev;						\
+					(_he)[__myhe->prev].next = __myhe->next;				\
+				if (__myhe->next != INVALID_ENTRY)							\
+					(_he)[__myhe->next].prev = __myhe->prev;				\
 			}																\
 			__myhe->next = *__myhsp;										\
-			__myhe->prev = NULL;											\
-			__myhe->hindex = __hindex;										\
+			__myhe->prev = INVALID_ENTRY;									\
+			__myhe->hindex = _hindex;										\
 			__myhe->pos  = (_s);											\
-			if (*__myhsp != NULL)											\
-				(*__myhsp)->prev = __myhe;									\
-			*__myhsp = __myhe;												\
-			if (++(_hn) >= PGLZ_HISTORY_SIZE) {								\
-				(_hn) = 0;													\
+			if (*__myhsp != INVALID_ENTRY)									\
+				(_he)[(*__myhsp)].prev = _hn;								\
+			*__myhsp = _hn;													\
+			if (++(_hn) >= PGLZ_HISTORY_SIZE + 1) {							\
+				(_hn) = 1;													\
 				(_recycle) = true;											\
 			}																\
 } while (0)
 
+/*
+ * An version of pglz_hist_add() that doesn't do recycling. Can be used if
+ * you know the input fits in PGLZ_HISTORY_SIZE.
+ */
+#define pglz_hist_add_no_recycle(_hs,_he,_hn,_s,_e, _hindex)	\
+do {									\
+			int16 *__myhsp = &(_hs)[_hindex];								\
+			PGLZ_HistEntry *__myhe = &(_he)[_hn];							\
+			__myhe->next = *__myhsp;										\
+			__myhe->prev = INVALID_ENTRY;									\
+			__myhe->hindex = _hindex;										\
+			__myhe->pos  = (_s);											\
+			if (*__myhsp != INVALID_ENTRY)									\
+				(_he)[(*__myhsp)].prev = _hn;								\
+			*__myhsp = _hn;													\
+			++(_hn);														\
+} while (0)
+
 
 /* ----------
  * pglz_out_ctrl -
@@ -372,28 +421,42 @@ do { \
  * ----------
  */
 static inline int
-pglz_find_match(PGLZ_HistEntry **hstart, const char *input, const char *end,
-				int *lenp, int *offp, int good_match, int good_drop)
+pglz_find_match(int16 *hstart, const char *input, const char *end,
+				int *lenp, int *offp, int good_match, int good_drop,
+				const char *hend, int hindex)
 {
-	PGLZ_HistEntry *hent;
+	int16		hentno;
 	int32		len = 0;
 	int32		off = 0;
 
 	/*
 	 * Traverse the linked history list until a good enough match is found.
 	 */
-	hent = hstart[pglz_hist_idx(input, end)];
-	while (hent)
+	hentno = hstart[hindex];
+	while (hentno != INVALID_ENTRY)
 	{
+		PGLZ_HistEntry *hent = &hist_entries[hentno];
 		const char *ip = input;
 		const char *hp = hent->pos;
 		int32		thisoff;
 		int32		thislen;
+		int32		maxlen;
+
+		maxlen = PGLZ_MAX_MATCH;
+		if (end - input < maxlen)
+			maxlen = end - input;
+		if (hend && (hend - hp < maxlen))
+			maxlen = hend - hp;
+
 
 		/*
 		 * Stop if the offset does not fit into our tag anymore.
 		 */
-		thisoff = ip - hp;
+		if (!hend)
+			thisoff = ip - hp;
+		else
+			thisoff = hend - hp;
+
 		if (thisoff >= 0x0fff)
 			break;
 
@@ -413,7 +476,7 @@ pglz_find_match(PGLZ_HistEntry **hstart, const char *input, const char *end,
 				thislen = len;
 				ip += len;
 				hp += len;
-				while (ip < end && *ip == *hp && thislen < PGLZ_MAX_MATCH)
+				while (*ip == *hp && thislen < maxlen)
 				{
 					thislen++;
 					ip++;
@@ -423,7 +486,7 @@ pglz_find_match(PGLZ_HistEntry **hstart, const char *input, const char *end,
 		}
 		else
 		{
-			while (ip < end && *ip == *hp && thislen < PGLZ_MAX_MATCH)
+			while (*ip == *hp && thislen < maxlen)
 			{
 				thislen++;
 				ip++;
@@ -443,13 +506,13 @@ pglz_find_match(PGLZ_HistEntry **hstart, const char *input, const char *end,
 		/*
 		 * Advance to the next history entry
 		 */
-		hent = hent->next;
+		hentno = hent->next;
 
 		/*
 		 * Be happy with lesser good matches the more entries we visited. But
 		 * no point in doing calculation if we're at end of list.
 		 */
-		if (hent)
+		if (hentno != INVALID_ENTRY)
 		{
 			if (len >= good_match)
 				break;
@@ -471,6 +534,29 @@ pglz_find_match(PGLZ_HistEntry **hstart, const char *input, const char *end,
 	return 0;
 }
 
+static int
+choose_hash_size(int slen)
+{
+	int hashsz;
+
+	/*
+	 * Experiments suggest that these hash sizes work pretty well. A large
+	 * hash table minimizes collision, but has a higher startup cost. For
+	 * a small input, the startup cost dominates. The table size must be
+	 * a power of two.
+	 */
+	if (slen < 128)
+		hashsz = 512;
+	else if (slen < 256)
+		hashsz = 1024;
+	else if (slen < 512)
+		hashsz = 2048;
+	else if (slen < 1024)
+		hashsz = 4096;
+	else
+		hashsz = 8192;
+	return hashsz;
+}
 
 /* ----------
  * pglz_compress -
@@ -484,7 +570,7 @@ pglz_compress(const char *source, int32 slen, PGLZ_Header *dest,
 {
 	unsigned char *bp = ((unsigned char *) dest) + sizeof(PGLZ_Header);
 	unsigned char *bstart = bp;
-	int			hist_next = 0;
+	int			hist_next = 1;
 	bool		hist_recycle = false;
 	const char *dp = source;
 	const char *dend = source + slen;
@@ -500,6 +586,8 @@ pglz_compress(const char *source, int32 slen, PGLZ_Header *dest,
 	int32		result_size;
 	int32		result_max;
 	int32		need_rate;
+	int			hashsz;
+	int			mask;
 
 	/*
 	 * Our fallback strategy is the default.
@@ -555,17 +643,21 @@ pglz_compress(const char *source, int32 slen, PGLZ_Header *dest,
 	else
 		result_max = (slen * (100 - need_rate)) / 100;
 
+	hashsz = choose_hash_size(slen);
+	mask = hashsz - 1;
+
 	/*
 	 * Initialize the history lists to empty.  We do not need to zero the
 	 * hist_entries[] array; its entries are initialized as they are used.
 	 */
-	memset(hist_start, 0, sizeof(hist_start));
+	memset(hist_start, 0, hashsz * sizeof(int16));
 
 	/*
 	 * Compress the source directly into the output buffer.
 	 */
 	while (dp < dend)
 	{
+		uint32		hindex;
 		/*
 		 * If we already exceeded the maximum result size, fail.
 		 *
@@ -588,8 +680,9 @@ pglz_compress(const char *source, int32 slen, PGLZ_Header *dest,
 		/*
 		 * Try to find a match in the history
 		 */
+		hindex = pglz_hist_idx(dp, dend, mask);
 		if (pglz_find_match(hist_start, dp, dend, &match_len,
-							&match_off, good_match, good_drop))
+							&match_off, good_match, good_drop, NULL, hindex))
 		{
 			/*
 			 * Create the tag and add history entries for all matched
@@ -598,9 +691,10 @@ pglz_compress(const char *source, int32 slen, PGLZ_Header *dest,
 			pglz_out_tag(ctrlp, ctrlb, ctrl, bp, match_len, match_off);
 			while (match_len--)
 			{
+				hindex = pglz_hist_idx(dp, dend, mask);
 				pglz_hist_add(hist_start, hist_entries,
 							  hist_next, hist_recycle,
-							  dp, dend);
+							  dp, dend, hindex);
 				dp++;			/* Do not do this ++ in the line above! */
 				/* The macro would do it four times - Jan.	*/
 			}
@@ -614,7 +708,7 @@ pglz_compress(const char *source, int32 slen, PGLZ_Header *dest,
 			pglz_out_literal(ctrlp, ctrlb, ctrl, bp, *dp);
 			pglz_hist_add(hist_start, hist_entries,
 						  hist_next, hist_recycle,
-						  dp, dend);
+						  dp, dend, hindex);
 			dp++;				/* Do not do this ++ in the line above! */
 			/* The macro would do it four times - Jan.	*/
 		}
@@ -637,6 +731,198 @@ pglz_compress(const char *source, int32 slen, PGLZ_Header *dest,
 	return true;
 }
 
+/*
+ * Delta encoding.
+ *
+ * The 'source' is encoded using the same pglz algorithm used for compression.
+ * The difference with pglz_compress is that the back-references refer to
+ * the 'history', instead of earlier offsets in 'source'.
+ *
+ * The encoded result is written to *dest, and its length is returned in
+ * *finallen.
+ */
+bool
+pglz_delta_encode(const char *source, int32 slen,
+				  const char *history, int32 hlen,
+				  char *dest, uint32 *finallen,
+				  const PGLZ_Strategy *strategy)
+{
+	unsigned char *bp = ((unsigned char *) dest);
+	unsigned char *bstart = bp;
+	const char *dp = source;
+	const char *dend = source + slen;
+	const char *hp = history;
+	const char *hend = history + hlen;
+	unsigned char ctrl_dummy = 0;
+	unsigned char *ctrlp = &ctrl_dummy;
+	unsigned char ctrlb = 0;
+	unsigned char ctrl = 0;
+	bool		found_match = false;
+	int32		match_len = 0;
+	int32		match_off;
+	int32		result_size;
+	int32		result_max;
+	int32		good_match;
+	int32		good_drop;
+	int32		need_rate;
+	int			hist_next = 0;
+	int			hashsz;
+	int			mask;
+	int32		a,b,c,d;
+	int32		hindex;
+
+	/*
+	 * Tuples of length greater than PGLZ_HISTORY_SIZE are not allowed for
+	 * delta encode as this is the maximum size of history offset.
+	 */
+	if (hlen >= PGLZ_HISTORY_SIZE || hlen < 4)
+		return false;
+
+	/*
+	 * Our fallback strategy is the default.
+	 */
+	if (strategy == NULL)
+		strategy = PGLZ_strategy_default;
+
+	/*
+	 * If the strategy forbids compression (at all or if source chunk size out
+	 * of range), fail.
+	 */
+	if (strategy->match_size_good <= 0 ||
+		slen < strategy->min_input_size ||
+		slen > strategy->max_input_size)
+		return false;
+
+	need_rate = strategy->min_comp_rate;
+	if (need_rate < 0)
+		need_rate = 0;
+	else if (need_rate > 99)
+		need_rate = 99;
+
+	/*
+	 * Limit the match parameters to the supported range.
+	 */
+	good_match = strategy->match_size_good;
+	if (good_match > PGLZ_MAX_MATCH)
+		good_match = PGLZ_MAX_MATCH;
+	else if (good_match < 17)
+		good_match = 17;
+
+	good_drop = strategy->match_size_drop;
+	if (good_drop < 0)
+		good_drop = 0;
+	else if (good_drop > 100)
+		good_drop = 100;
+
+	/*
+	 * Compute the maximum result size allowed by the strategy, namely the
+	 * input size minus the minimum wanted compression rate.  This had better
+	 * be <= slen, else we might overrun the provided output buffer.
+	 */
+	if (slen > (INT_MAX / 100))
+	{
+		/* Approximate to avoid overflow */
+		result_max = (slen / 100) * (100 - need_rate);
+	}
+	else
+		result_max = (slen * (100 - need_rate)) / 100;
+
+	hashsz = choose_hash_size(hlen);
+	mask = hashsz - 1;
+
+	/*
+	 * Initialize the history lists to empty.  We do not need to zero the
+	 * hist_entries[] array; its entries are initialized as they are used.
+	 */
+	memset(hist_start, 0, hashsz * sizeof(int16));
+
+	pglz_hash_init(hp, hindex, a,b,c,d);
+	while (hp < hend - 4)
+	{
+		/*
+		 * TODO: It would be nice to behave like the history and the source
+		 * strings were concatenated, so that you could compress using the
+		 * new data, too.
+		 */
+		pglz_hash_roll(hp, hindex, a,b,c,d, mask);
+		pglz_hist_add_no_recycle(hist_start, hist_entries,
+								 hist_next,
+								 hp, hend, hindex);
+		hp++;			/* Do not do this ++ in the line above! */
+	}
+
+	/*
+	 * Loop through the input.
+	 */
+	match_off = 0;
+	pglz_hash_init(dp, hindex,a,b,c,d);
+	while (dp < dend - 4)
+	{
+		/*
+		 * If we already exceeded the maximum result size, fail.
+		 *
+		 * We check once per loop; since the loop body could emit as many as 4
+		 * bytes (a control byte and 3-byte tag), PGLZ_MAX_OUTPUT() had better
+		 * allow 4 slop bytes.
+		 */
+		if (bp - bstart >= result_max)
+			return false;
+
+		/*
+		 * Try to find a match in the history
+		 */
+		pglz_hash_roll(dp,hindex,a,b,c,d,mask);
+		if (pglz_find_match(hist_start, dp, dend, &match_len,
+							&match_off, good_match, good_drop, hend, hindex))
+		{
+			/*
+			 * Create the tag and add history entries for all matched
+			 * characters.
+			 */
+			pglz_out_tag(ctrlp, ctrlb, ctrl, bp, match_len, match_off);
+			dp += match_len;
+			found_match = true;
+			pglz_hash_init(dp, hindex,a,b,c,d);
+		}
+		else
+		{
+			/*
+			 * No match found. Copy one literal byte.
+			 */
+			pglz_out_literal(ctrlp, ctrlb, ctrl, bp, *dp);
+			dp++;				/* Do not do this ++ in the line above! */
+			/* The macro would do it four times - Jan.	*/
+		}
+	}
+
+	if (!found_match)
+		return false;
+
+	/* Handle the last few bytes as literals */
+	while (dp < dend)
+	{
+		pglz_out_literal(ctrlp, ctrlb, ctrl, bp, *dp);
+		dp++;				/* Do not do this ++ in the line above! */
+	}
+
+	/*
+	 * Write out the last control byte and check that we haven't overrun the
+	 * output size allowed by the strategy.
+	 */
+	*ctrlp = ctrlb;
+	result_size = bp - bstart;
+
+#ifdef DELTA_DEBUG
+	elog(LOG, "old %d new %d compressed %d", hlen, slen, result_size);
+#endif
+
+	/*
+	 * Success - need only fill in the actual length of the compressed datum.
+	 */
+	*finallen = result_size;
+
+	return true;
+}
 
 /* ----------
  * pglz_decompress -
@@ -740,3 +1026,107 @@ pglz_decompress(const PGLZ_Header *source, char *dest)
 	 * That's it.
 	 */
 }
+
+/* ----------
+ * pglz_delta_decode
+ *
+ *		Decompresses source into dest.
+ *		To decompress, it uses history if provided.
+ * ----------
+ */
+void
+pglz_delta_decode(const char *source, uint32 srclen,
+				  char *dest, uint32 destlen, uint32 *finallen,
+				  const char *history, uint32 histlen)
+{
+	const unsigned char *sp;
+	const unsigned char *srcend;
+	unsigned char *dp;
+	unsigned char *destend;
+	const char *hend;
+
+	sp = ((const unsigned char *) source);
+	srcend = ((const unsigned char *) source) + srclen;
+	dp = (unsigned char *) dest;
+	destend = dp + destlen;
+	hend = history + histlen;
+
+	while (sp < srcend && dp < destend)
+	{
+		/*
+		 * Read one control byte and process the next 8 items (or as many as
+		 * remain in the compressed input).
+		 */
+		unsigned char ctrl = *sp++;
+		int			ctrlc;
+
+		for (ctrlc = 0; ctrlc < 8 && sp < srcend; ctrlc++)
+		{
+			if (ctrl & 1)
+			{
+				/*
+				 * Otherwise it contains the match length minus 3 and the
+				 * upper 4 bits of the offset. The next following byte
+				 * contains the lower 8 bits of the offset. If the length is
+				 * coded as 18, another extension tag byte tells how much
+				 * longer the match really was (0-255).
+				 */
+				int32		len;
+				int32		off;
+
+				len = (sp[0] & 0x0f) + 3;
+				off = ((sp[0] & 0xf0) << 4) | sp[1];
+				sp += 2;
+				if (len == 18)
+					len += *sp++;
+
+				/*
+				 * Check for output buffer overrun, to ensure we don't clobber
+				 * memory in case of corrupt input.  Note: we must advance dp
+				 * here to ensure the error is detected below the loop.  We
+				 * don't simply put the elog inside the loop since that will
+				 * probably interfere with optimization.
+				 */
+				if (dp + len > destend)
+				{
+					dp += len;
+					break;
+				}
+
+				/*
+				 * Now we copy the bytes specified by the tag from history
+				 * to OUTPUT.
+				 */
+				memcpy(dp, hend - off, len);
+				dp += len;
+			}
+			else
+			{
+				/*
+				 * An unset control bit means LITERAL BYTE. So we just
+				 * copy one from INPUT to OUTPUT.
+				 */
+				if (dp >= destend)	/* check for buffer overrun */
+					break;	/* do not clobber memory */
+
+				*dp++ = *sp++;
+			}
+
+			/*
+			 * Advance the control bit
+			 */
+			ctrl >>= 1;
+		}
+	}
+
+	/*
+	 * Check we decompressed the right amount.
+	 */
+	if (sp != srcend)
+		elog(PANIC, "compressed data is corrupt");
+
+	/*
+	 * That's it.
+	 */
+	*finallen = ((char *) dp - dest);
+}
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 22ba35f..6ff6b23 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -124,6 +124,7 @@ extern char *default_tablespace;
 extern char *temp_tablespaces;
 extern bool ignore_checksum_failure;
 extern bool synchronize_seqscans;
+extern int	wal_update_compression_ratio;
 extern int	ssl_renegotiation_limit;
 extern char *SSLCipherSuites;
 
@@ -2410,6 +2411,17 @@ static struct config_int ConfigureNamesInt[] =
 		NULL, NULL, NULL
 	},
 
+	{
+		/* Not for general use */
+		{"wal_update_compression_ratio", PGC_USERSET, DEVELOPER_OPTIONS,
+			gettext_noop("Sets the compression ratio of delta record for wal update"),
+			NULL,
+		},
+		&wal_update_compression_ratio,
+		25, 0, 100,
+		NULL, NULL, NULL
+	},
+
 	/* End-of-list marker */
 	{
 		{NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL, NULL
diff --git a/src/include/access/heapam_xlog.h b/src/include/access/heapam_xlog.h
index e58eae5..386277d 100644
--- a/src/include/access/heapam_xlog.h
+++ b/src/include/access/heapam_xlog.h
@@ -147,13 +147,22 @@ typedef struct xl_heap_update
 	TransactionId old_xmax;		/* xmax of the old tuple */
 	TransactionId new_xmax;		/* xmax of the new tuple */
 	ItemPointerData newtid;		/* new inserted tuple id */
-	uint8		old_infobits_set;	/* infomask bits to set on old tuple */
-	bool		all_visible_cleared;	/* PD_ALL_VISIBLE was cleared */
-	bool		new_all_visible_cleared;		/* same for the page of newtid */
+	uint8		old_infobits_set;		/* infomask bits to set on old tuple */
+	uint8		flags;			/* flag bits, see below */
 	/* NEW TUPLE xl_heap_header AND TUPLE DATA FOLLOWS AT END OF STRUCT */
 } xl_heap_update;
 
-#define SizeOfHeapUpdate	(offsetof(xl_heap_update, new_all_visible_cleared) + sizeof(bool))
+#define XL_HEAP_UPDATE_ALL_VISIBLE_CLEARED		0x01	/* Indicates as old
+														 * page's all visible
+														 * bit is cleared */
+#define XL_HEAP_UPDATE_NEW_ALL_VISIBLE_CLEARED	0x02	/* Indicates as new
+														 * page's all visible
+														 * bit is cleared */
+#define XL_HEAP_UPDATE_DELTA_ENCODED			0x04	/* Indicates as the
+														 * update operation is
+														 * delta encoded */
+
+#define SizeOfHeapUpdate	(offsetof(xl_heap_update, flags) + sizeof(uint8))
 
 /*
  * This is what we need to know about vacuum page cleanup/redirect
diff --git a/src/include/access/htup_details.h b/src/include/access/htup_details.h
index cd01ecd..1ef550b 100644
--- a/src/include/access/htup_details.h
+++ b/src/include/access/htup_details.h
@@ -687,6 +687,11 @@ extern HeapTuple heap_modify_tuple(HeapTuple tuple,
 extern void heap_deform_tuple(HeapTuple tuple, TupleDesc tupleDesc,
 				  Datum *values, bool *isnull);
 
+extern bool heap_delta_encode(TupleDesc tupleDesc, HeapTuple oldtup,
+				  HeapTuple newtup, char *encdata, uint32 *enclen);
+extern void heap_delta_decode (char *encdata, uint32 enclen, HeapTuple oldtup,
+				HeapTuple newtup);
+
 /* these three are deprecated versions of the three above: */
 extern HeapTuple heap_formtuple(TupleDesc tupleDescriptor,
 			   Datum *values, char *nulls);
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index f8f06c1..56efcac 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -261,6 +261,7 @@ typedef struct CheckpointStatsData
 extern CheckpointStatsData CheckpointStats;
 
 extern XLogRecPtr XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata);
+extern bool XLogCheckBufferNeedsBackup(Buffer buffer);
 extern void XLogFlush(XLogRecPtr RecPtr);
 extern bool XLogBackgroundFlush(void);
 extern bool XLogNeedsFlush(XLogRecPtr RecPtr);
diff --git a/src/include/utils/pg_lzcompress.h b/src/include/utils/pg_lzcompress.h
index 4af24a3..5add61a 100644
--- a/src/include/utils/pg_lzcompress.h
+++ b/src/include/utils/pg_lzcompress.h
@@ -107,6 +107,12 @@ extern const PGLZ_Strategy *const PGLZ_strategy_always;
  */
 extern bool pglz_compress(const char *source, int32 slen, PGLZ_Header *dest,
 			  const PGLZ_Strategy *strategy);
+extern bool pglz_delta_encode(const char *source, int32 slen,
+				  const char *history, int32 hlen,
+				  char *dest, uint32 *finallen, const PGLZ_Strategy *strategy);
 extern void pglz_decompress(const PGLZ_Header *source, char *dest);
+extern void pglz_delta_decode(const char *source, uint32 srclen,
+							  char *dest, uint32 destlen, uint32 *finallen,
+							  const char *history, uint32 histlen);
 
 #endif   /* _PG_LZCOMPRESS_H_ */
diff --git a/src/test/regress/expected/update.out b/src/test/regress/expected/update.out
index 71b856f..af46df2 100644
--- a/src/test/regress/expected/update.out
+++ b/src/test/regress/expected/update.out
@@ -97,3 +97,73 @@ SELECT a, b, char_length(c) FROM update_test;
 (2 rows)
 
 DROP TABLE update_test;
+--
+-- Test to update continuos and non continuos columns
+--
+DROP TABLE IF EXISTS update_test;
+NOTICE:  table "update_test" does not exist, skipping
+CREATE TABLE update_test (
+		bser bigserial,
+		bln boolean,
+		ename VARCHAR(25),
+		perf_f float(8),
+		grade CHAR,
+		dept CHAR(5) NOT NULL,
+		dob DATE,
+		idnum INT,
+		addr VARCHAR(30) NOT NULL,
+		destn CHAR(6),
+		Gend CHAR,
+		samba BIGINT,
+		hgt float,
+		ctime TIME
+);
+INSERT INTO update_test VALUES (
+		nextval('update_test_bser_seq'::regclass),
+		TRUE,
+		'Test',
+		7.169,
+		'B',
+		'CSD',
+		'2000-01-01',
+		520,
+		'road2,
+		streeeeet2,
+		city2',
+		'dcy2',
+		'M',
+		12000,
+		50.4,
+		'00:00:00.0'
+);
+SELECT * from update_test;
+ bser | bln | ename | perf_f | grade | dept  |    dob     | idnum |            addr             | destn  | gend | samba | hgt  |  ctime   
+------+-----+-------+--------+-------+-------+------------+-------+-----------------------------+--------+------+-------+------+----------
+    1 | t   | Test  |  7.169 | B     | CSD   | 01-01-2000 |   520 | road2,                     +| dcy2   | M    | 12000 | 50.4 | 00:00:00
+      |     |       |        |       |       |            |       |                 streeeeet2,+|        |      |       |      | 
+      |     |       |        |       |       |            |       |                 city2       |        |      |       |      | 
+(1 row)
+
+-- update first column
+UPDATE update_test SET bser = bser - 1 + 1;
+-- update middle column
+UPDATE update_test SET perf_f = 8.9;
+-- update last column
+UPDATE update_test SET ctime = '00:00:00.1';
+-- update 3 continuos columns
+UPDATE update_test SET destn = 'dcy2', samba = 0 WHERE Gend = 'M' and dept = 'CSD';
+-- update two non continuos columns
+UPDATE update_test SET destn = 'moved', samba = 0;
+UPDATE update_test SET bln = FALSE, hgt = 10.1;
+-- update causing some column alignment difference
+UPDATE update_test SET ename = 'Tes';
+UPDATE update_test SET dept = 'Test';
+SELECT * from update_test;
+ bser | bln | ename | perf_f | grade | dept  |    dob     | idnum |            addr             | destn  | gend | samba | hgt  |   ctime    
+------+-----+-------+--------+-------+-------+------------+-------+-----------------------------+--------+------+-------+------+------------
+    1 | f   | Tes   |    8.9 | B     | Test  | 01-01-2000 |   520 | road2,                     +| moved  | M    |     0 | 10.1 | 00:00:00.1
+      |     |       |        |       |       |            |       |                 streeeeet2,+|        |      |       |      | 
+      |     |       |        |       |       |            |       |                 city2       |        |      |       |      | 
+(1 row)
+
+DROP TABLE update_test;
diff --git a/src/test/regress/sql/update.sql b/src/test/regress/sql/update.sql
index a8a028f..1806992 100644
--- a/src/test/regress/sql/update.sql
+++ b/src/test/regress/sql/update.sql
@@ -59,3 +59,70 @@ UPDATE update_test SET c = repeat('x', 10000) WHERE c = 'car';
 SELECT a, b, char_length(c) FROM update_test;
 
 DROP TABLE update_test;
+
+
+--
+-- Test to update continuos and non continuos columns
+--
+
+DROP TABLE IF EXISTS update_test;
+CREATE TABLE update_test (
+		bser bigserial,
+		bln boolean,
+		ename VARCHAR(25),
+		perf_f float(8),
+		grade CHAR,
+		dept CHAR(5) NOT NULL,
+		dob DATE,
+		idnum INT,
+		addr VARCHAR(30) NOT NULL,
+		destn CHAR(6),
+		Gend CHAR,
+		samba BIGINT,
+		hgt float,
+		ctime TIME
+);
+
+INSERT INTO update_test VALUES (
+		nextval('update_test_bser_seq'::regclass),
+		TRUE,
+		'Test',
+		7.169,
+		'B',
+		'CSD',
+		'2000-01-01',
+		520,
+		'road2,
+		streeeeet2,
+		city2',
+		'dcy2',
+		'M',
+		12000,
+		50.4,
+		'00:00:00.0'
+);
+
+SELECT * from update_test;
+
+-- update first column
+UPDATE update_test SET bser = bser - 1 + 1;
+
+-- update middle column
+UPDATE update_test SET perf_f = 8.9;
+
+-- update last column
+UPDATE update_test SET ctime = '00:00:00.1';
+
+-- update 3 continuos columns
+UPDATE update_test SET destn = 'dcy2', samba = 0 WHERE Gend = 'M' and dept = 'CSD';
+
+-- update two non continuos columns
+UPDATE update_test SET destn = 'moved', samba = 0;
+UPDATE update_test SET bln = FALSE, hgt = 10.1;
+
+-- update causing some column alignment difference
+UPDATE update_test SET ename = 'Tes';
+UPDATE update_test SET dept = 'Test';
+
+SELECT * from update_test;
+DROP TABLE update_test;