From 084a2fe968a3ee9c0bb4f18fa9fbc8a582f38b3c Mon Sep 17 00:00:00 2001 From: Michael Banck Date: Fri, 18 Oct 2019 10:48:22 +0200 Subject: [PATCH] Fix base backup checksum verification for random or zero page headers We currently do not checksum a page if it is considered new by PageIsNew() or if its pd_lsn page header field is larger than the LSN at the beginning of the base backup. However, this means that if the whole page header is zero, PageIsNew() will consider that page new due to the pd_upper field being zero and no subsequent checksum verification is done. Also, a random value in the pd_lsn field will very likely be larger than the LSN at backup start, again resulting in the page not getting checksummed. To fix, factor out the all-zeroes check from PageIsVerified() into a new method PageIsZero() and call that for pages considered new by PageIsNew(). If a page is all zero, consider that a checksum failure. Also check the pd_lsn field against both the backup start pointer and the current pointer from GetInsertRecPtr(). Add two more tests to the pg_basebackup TAP tests to check those errors. Reported-By: Andres Freund Discussion: https://postgr.es/m/20190319200050.ncuxejradurjakdc@alap3.anarazel.de --- src/backend/replication/basebackup.c | 159 +++++++++++++++------------ src/backend/storage/page/bufpage.c | 56 ++++++---- src/bin/pg_basebackup/t/010_pg_basebackup.pl | 30 +++-- src/include/storage/bufpage.h | 1 + 4 files changed, 148 insertions(+), 98 deletions(-) diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c index d0f210de8c..ee026bc1d0 100644 --- a/src/backend/replication/basebackup.c +++ b/src/backend/replication/basebackup.c @@ -1456,87 +1456,28 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf page = buf + BLCKSZ * i; /* - * Only check pages which have not been modified since the - * start of the base backup. Otherwise, they might have been - * written only halfway and the checksum would not be valid. - * However, replaying WAL would reinstate the correct page in - * this case. We also skip completely new pages, since they - * don't have a checksum yet. + * We skip completely new pages after checking they are + * all-zero, since they don't have a checksum yet. */ - if (!PageIsNew(page) && PageGetLSN(page) < startptr) + if (PageIsNew(page)) { - checksum = pg_checksum_page((char *) page, blkno + segmentno * RELSEG_SIZE); - phdr = (PageHeader) page; - if (phdr->pd_checksum != checksum) + if (!PageIsZero(page)) { /* - * Retry the block on the first failure. It's - * possible that we read the first 4K page of the - * block just before postgres updated the entire block - * so it ends up looking torn to us. We only need to - * retry once because the LSN should be updated to - * something we can ignore on the next pass. If the - * error happens again then it is a true validation - * failure. + * pd_upper is zero, but the page is not all zero. We + * cannot run pg_checksum_page() on the page as it + * would throw an assertion failure. Consider this a + * checksum failure. */ - if (block_retry == false) - { - /* Reread the failed block */ - if (fseek(fp, -(cnt - BLCKSZ * i), SEEK_CUR) == -1) - { - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not fseek in file \"%s\": %m", - readfilename))); - } - - if (fread(buf + BLCKSZ * i, 1, BLCKSZ, fp) != BLCKSZ) - { - /* - * If we hit end-of-file, a concurrent - * truncation must have occurred, so break out - * of this loop just as if the initial fread() - * returned 0. We'll drop through to the same - * code that handles that case. (We must fix - * up cnt first, though.) - */ - if (feof(fp)) - { - cnt = BLCKSZ * i; - break; - } - - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not reread block %d of file \"%s\": %m", - blkno, readfilename))); - } - - if (fseek(fp, cnt - BLCKSZ * i - BLCKSZ, SEEK_CUR) == -1) - { - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not fseek in file \"%s\": %m", - readfilename))); - } - - /* Set flag so we know a retry was attempted */ - block_retry = true; - - /* Reset loop to validate the block again */ - i--; - continue; - } checksum_failures++; if (checksum_failures <= 5) ereport(WARNING, (errmsg("checksum verification failed in " - "file \"%s\", block %d: calculated " - "%X but expected %X", - readfilename, blkno, checksum, - phdr->pd_checksum))); + "file \"%s\", block %d: pd_upper " + "is zero but page is not all-zero", + readfilename, blkno))); if (checksum_failures == 5) ereport(WARNING, (errmsg("further checksum verification " @@ -1544,6 +1485,84 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf "be reported", readfilename))); } } + else + { + /* + * Only check pages which have not been modified since the + * start of the base backup. Otherwise, they might have been + * written only halfway and the checksum would not be valid. + * However, replaying WAL would reinstate the correct page in + * this case. If the page LSN is larger than the current insert + * pointer then we assume a bogus LSN due to random page header + * corruption and do verify the checksum. + */ + if (PageGetLSN(page) < startptr || PageGetLSN(page) > GetInsertRecPtr()) + { + checksum = pg_checksum_page((char *) page, blkno + segmentno * RELSEG_SIZE); + phdr = (PageHeader) page; + if (phdr->pd_checksum != checksum) + { + /* + * Retry the block on the first failure. It's + * possible that we read the first 4K page of the + * block just before postgres updated the entire block + * so it ends up looking torn to us. We only need to + * retry once because the LSN should be updated to + * something we can ignore on the next pass. If the + * error happens again then it is a true validation + * failure. + */ + if (block_retry == false) + { + /* Reread the failed block */ + if (fseek(fp, -(cnt - BLCKSZ * i), SEEK_CUR) == -1) + { + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not fseek in file \"%s\": %m", + readfilename))); + } + + if (fread(buf + BLCKSZ * i, 1, BLCKSZ, fp) != BLCKSZ) + { + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not reread block %d of file \"%s\": %m", + blkno, readfilename))); + } + + if (fseek(fp, cnt - BLCKSZ * i - BLCKSZ, SEEK_CUR) == -1) + { + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not fseek in file \"%s\": %m", + readfilename))); + } + + /* Set flag so we know a retry was attempted */ + block_retry = true; + + /* Reset loop to validate the block again */ + i--; + continue; + } + checksum_failures++; + + if (checksum_failures <= 5) + ereport(WARNING, + (errmsg("checksum verification failed in " + "file \"%s\", block %d: calculated " + "%X but expected %X", + readfilename, blkno, checksum, + phdr->pd_checksum))); + if (checksum_failures == 5) + ereport(WARNING, + (errmsg("further checksum verification " + "failures in file \"%s\" will not " + "be reported", readfilename))); + } + } + } block_retry = false; blkno++; } diff --git a/src/backend/storage/page/bufpage.c b/src/backend/storage/page/bufpage.c index 6b49810e37..0b814ab07d 100644 --- a/src/backend/storage/page/bufpage.c +++ b/src/backend/storage/page/bufpage.c @@ -82,11 +82,8 @@ bool PageIsVerified(Page page, BlockNumber blkno) { PageHeader p = (PageHeader) page; - size_t *pagebytes; - int i; bool checksum_failure = false; bool header_sane = false; - bool all_zeroes = false; uint16 checksum = 0; /* @@ -120,25 +117,9 @@ PageIsVerified(Page page, BlockNumber blkno) } /* - * Check all-zeroes case. Luckily BLCKSZ is guaranteed to always be a - * multiple of size_t - and it's much faster to compare memory using the - * native word size. + * Check all-zeroes case */ - StaticAssertStmt(BLCKSZ == (BLCKSZ / sizeof(size_t)) * sizeof(size_t), - "BLCKSZ has to be a multiple of sizeof(size_t)"); - - all_zeroes = true; - pagebytes = (size_t *) page; - for (i = 0; i < (BLCKSZ / sizeof(size_t)); i++) - { - if (pagebytes[i] != 0) - { - all_zeroes = false; - break; - } - } - - if (all_zeroes) + if (PageIsZero(page)) return true; /* @@ -161,6 +142,39 @@ PageIsVerified(Page page, BlockNumber blkno) return false; } +/* + * PageIsZero + * Check that the page consists only of zero bytes. + * + */ +bool +PageIsZero(Page page) +{ + bool all_zeroes = true; + int i; + size_t *pagebytes = (size_t *) page; + + /* + * Luckily BLCKSZ is guaranteed to always be a multiple of size_t - and + * it's much faster to compare memory using the native word size. + */ + StaticAssertStmt(BLCKSZ == (BLCKSZ / sizeof(size_t)) * sizeof(size_t), + "BLCKSZ has to be a multiple of sizeof(size_t)"); + + for (i = 0; i < (BLCKSZ / sizeof(size_t)); i++) + { + if (pagebytes[i] != 0) + { + all_zeroes = false; + break; + } + } + + if (all_zeroes) + return true; + else + return false; +} /* * PageAddItemExtended diff --git a/src/bin/pg_basebackup/t/010_pg_basebackup.pl b/src/bin/pg_basebackup/t/010_pg_basebackup.pl index b7d36b65dd..3d9c758ea2 100644 --- a/src/bin/pg_basebackup/t/010_pg_basebackup.pl +++ b/src/bin/pg_basebackup/t/010_pg_basebackup.pl @@ -6,7 +6,7 @@ use File::Basename qw(basename dirname); use File::Path qw(rmtree); use PostgresNode; use TestLib; -use Test::More tests => 106; +use Test::More tests => 109; program_help_ok('pg_basebackup'); program_version_ok('pg_basebackup'); @@ -495,21 +495,37 @@ my $file_corrupt2 = $node->safe_psql('postgres', my $pageheader_size = 24; my $block_size = $node->safe_psql('postgres', 'SHOW block_size;'); -# induce corruption +# induce corruption in the pageheader by writing random data into it system_or_bail 'pg_ctl', '-D', $pgdata, 'stop'; open $file, '+<', "$pgdata/$file_corrupt1"; -seek($file, $pageheader_size, 0); -syswrite($file, "\0\0\0\0\0\0\0\0\0"); +my $random_data = join '', map { ("a".."z")[rand 26] } 1 .. $pageheader_size; +syswrite($file, $random_data); +close $file; +system_or_bail 'pg_ctl', '-D', $pgdata, 'start'; + +$node->command_checks_all( + [ 'pg_basebackup', '-D', "$tempdir/backup_corrupt1" ], + 1, + [qr{^$}], + [qr/^WARNING.*checksum verification failed/s], + "pg_basebackup reports checksum mismatch for random pageheader data"); +rmtree("$tempdir/backup_corrupt1"); + +# zero out the pageheader completely +open $file, '+<', "$pgdata/$file_corrupt1"; +system_or_bail 'pg_ctl', '-D', $pgdata, 'stop'; +my $zero_data = "\0"x$pageheader_size; +syswrite($file, $zero_data); close $file; system_or_bail 'pg_ctl', '-D', $pgdata, 'start'; $node->command_checks_all( - [ 'pg_basebackup', '-D', "$tempdir/backup_corrupt" ], + [ 'pg_basebackup', '-D', "$tempdir/backup_corrupt1a" ], 1, [qr{^$}], [qr/^WARNING.*checksum verification failed/s], - 'pg_basebackup reports checksum mismatch'); -rmtree("$tempdir/backup_corrupt"); + "pg_basebackup reports checksum mismatch for zeroed pageheader"); +rmtree("$tempdir/backup_corrupt1a"); # induce further corruption in 5 more blocks system_or_bail 'pg_ctl', '-D', $pgdata, 'stop'; diff --git a/src/include/storage/bufpage.h b/src/include/storage/bufpage.h index 4ef6d8ddd4..6cfa98e49a 100644 --- a/src/include/storage/bufpage.h +++ b/src/include/storage/bufpage.h @@ -420,6 +420,7 @@ do { \ extern void PageInit(Page page, Size pageSize, Size specialSize); extern bool PageIsVerified(Page page, BlockNumber blkno); +extern bool PageIsZero(Page page); extern OffsetNumber PageAddItemExtended(Page page, Item item, Size size, OffsetNumber offsetNumber, int flags); extern Page PageGetTempPage(Page page); -- 2.11.0