From 74bff8b4f8c9f9ab34d7dc8887ac27fc7ab56c2b Mon Sep 17 00:00:00 2001 From: Andrey Date: Mon, 24 Jun 2019 13:36:47 +0500 Subject: [PATCH] Use memcpy in pglz decompression Previously pglz was using byte loop during decompression. This commit is employing memcpy for this purpose. --- src/common/pg_lzcompress.c | 47 +++++++++++++++++++++++++++----------- 1 file changed, 34 insertions(+), 13 deletions(-) diff --git a/src/common/pg_lzcompress.c b/src/common/pg_lzcompress.c index 988b3987d0..563144a3ad 100644 --- a/src/common/pg_lzcompress.c +++ b/src/common/pg_lzcompress.c @@ -714,11 +714,13 @@ pglz_decompress(const char *source, int32 slen, char *dest, if (ctrl & 1) { /* - * Otherwise it contains the match length minus 3 and the - * upper 4 bits of the offset. The next following byte - * contains the lower 8 bits of the offset. If the length is - * coded as 18, another extension tag byte tells how much - * longer the match really was (0-255). + * Set control bit means we must read a match tag. + * The match is coded with two bytes. First byte use lower + * nibble to code length - 3. Higher nibble contain upper 4 + * bits of the offset. The next following byte contains the + * lower 8 bits of the offset. If the length is coded as 18, + * another extension tag byte tells how much longer the match + * really was (0-255). */ int32 len; int32 off; @@ -731,22 +733,41 @@ pglz_decompress(const char *source, int32 slen, char *dest, /* * Now we copy the bytes specified by the tag from OUTPUT to - * OUTPUT. It is dangerous and platform dependent to use - * memcpy() here, because the copied areas could overlap - * extremely! + * OUTPUT (copy len bytes from dp - off to dp). The copied + * areas could overlap, to preven possible uncertanity, we copy + * only non-overlapping regions. */ len = Min(len, destend - dp); - while (len--) + while (off < len) { - *dp = dp[-off]; - dp++; + /* + * When offset is smaller than lengh - source and + * destination regions overlap. memmove() is resolving this + * overlap in an incompatible way with pglz. Thus we resort + * to memcpy()-ing non-overlapping regions. + * Consider input: 112341234123412341234 + * At byte 5 here ^ we have match with length 16 and + * offset 4. 11234M(len=16, off=4) + * We are decoding first period of match and rewrite match + * 112341234M(len=12, off=8) + * Same match is now at position 9, it aims to same start + * byte of output, but from another position: offset is + * doubled. We iterate through this offset growth until we + * can proceed to usual memcpy(). + */ + memcpy(dp, dp - off, off); + len -= off; + dp += off; + off += off; } + memcpy(dp, dp - off, len); + dp+=len; } else { /* - * An unset control bit means LITERAL BYTE. So we just copy - * one from INPUT to OUTPUT. + * An unset control bit means LITERAL BYTE. So we just + * copy one from INPUT to OUTPUT. */ *dp++ = *sp++; } -- 2.20.1