From 084a2fe968a3ee9c0bb4f18fa9fbc8a582f38b3c Mon Sep 17 00:00:00 2001 From: Michael Banck Date: Thu, 12 Mar 2020 07:31:22 +0100 Subject: [PATCH] Fix checksum verification in base backups for random or zero page headers We currently do not checksum a page if it is considered new by PageIsNew() or if its pd_lsn page header field is larger than the LSN at the beginning of the base backup. However, this means that if the whole page header is zero, PageIsNew() will consider that page new due to the pd_upper field being zero and no subsequent checksum verification is done. Also, a random value in the pd_lsn field will very likely be larger than the LSN at backup start, again resulting in the page not getting checksummed. To fix, factor out the all-zeroes check from PageIsVerified() into a new method PageIsZero() and call that for pages considered new by PageIsNew(). If a page is all zero, consider that a checksum failure. Also check the pd_lsn field against both the backup start pointer and the current pointer from GetInsertRecPtr(). Add two more tests to the pg_basebackup TAP tests to check those errors. Reported-By: Andres Freund Reviewed-By: Asif Rehman Discussion: https://postgr.es/m/20190319200050.ncuxejradurjakdc@alap3.anarazel.de diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c index 806d013108..6e4a5a1365 100644 --- a/src/backend/replication/basebackup.c +++ b/src/backend/replication/basebackup.c @@ -1526,87 +1526,27 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf page = buf + BLCKSZ * i; /* - * Only check pages which have not been modified since the - * start of the base backup. Otherwise, they might have been - * written only halfway and the checksum would not be valid. - * However, replaying WAL would reinstate the correct page in - * this case. We also skip completely new pages, since they - * don't have a checksum yet. + * We skip completely new pages after checking they are + * all-zero, since they don't have a checksum yet. */ - if (!PageIsNew(page) && PageGetLSN(page) < startptr) + if (PageIsNew(page)) { - checksum = pg_checksum_page((char *) page, blkno + segmentno * RELSEG_SIZE); - phdr = (PageHeader) page; - if (phdr->pd_checksum != checksum) + if (!PageIsZero(page)) { /* - * Retry the block on the first failure. It's - * possible that we read the first 4K page of the - * block just before postgres updated the entire block - * so it ends up looking torn to us. We only need to - * retry once because the LSN should be updated to - * something we can ignore on the next pass. If the - * error happens again then it is a true validation - * failure. + * pd_upper is zero, but the page is not all zero. We + * cannot run pg_checksum_page() on the page as it + * would throw an assertion failure. Consider this a + * checksum failure. */ - if (block_retry == false) - { - /* Reread the failed block */ - if (fseek(fp, -(cnt - BLCKSZ * i), SEEK_CUR) == -1) - { - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not fseek in file \"%s\": %m", - readfilename))); - } - - if (fread(buf + BLCKSZ * i, 1, BLCKSZ, fp) != BLCKSZ) - { - /* - * If we hit end-of-file, a concurrent - * truncation must have occurred, so break out - * of this loop just as if the initial fread() - * returned 0. We'll drop through to the same - * code that handles that case. (We must fix - * up cnt first, though.) - */ - if (feof(fp)) - { - cnt = BLCKSZ * i; - break; - } - - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not reread block %d of file \"%s\": %m", - blkno, readfilename))); - } - - if (fseek(fp, cnt - BLCKSZ * i - BLCKSZ, SEEK_CUR) == -1) - { - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not fseek in file \"%s\": %m", - readfilename))); - } - - /* Set flag so we know a retry was attempted */ - block_retry = true; - - /* Reset loop to validate the block again */ - i--; - continue; - } - checksum_failures++; if (checksum_failures <= 5) ereport(WARNING, (errmsg("checksum verification failed in " - "file \"%s\", block %d: calculated " - "%X but expected %X", - readfilename, blkno, checksum, - phdr->pd_checksum))); + "file \"%s\", block %d: pd_upper " + "is zero but page is not all-zero", + readfilename, blkno))); if (checksum_failures == 5) ereport(WARNING, (errmsg("further checksum verification " @@ -1614,6 +1554,85 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf "be reported", readfilename))); } } + else + { + /* + * Only check pages which have not been modified since the + * start of the base backup. Otherwise, they might have + * been written only halfway and the checksum would not be + * valid. However, replaying WAL would reinstate the + * correct page in this case. If the page LSN is larger + * than the current insert pointer then we assume a bogus + * LSN due to random page header corruption and do verify + * the checksum. + */ + if (PageGetLSN(page) < startptr || PageGetLSN(page) > GetInsertRecPtr()) + { + checksum = pg_checksum_page((char *) page, blkno + segmentno * RELSEG_SIZE); + phdr = (PageHeader) page; + if (phdr->pd_checksum != checksum) + { + /* + * Retry the block on the first failure. It's + * possible that we read the first 4K page of the + * block just before postgres updated the entire + * block so it ends up looking torn to us. We + * only need to retry once because the LSN should + * be updated to something we can ignore on the + * next pass. If the error happens again then it + * is a true validation failure. + */ + if (block_retry == false) + { + /* Reread the failed block */ + if (fseek(fp, -(cnt - BLCKSZ * i), SEEK_CUR) == -1) + { + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not fseek in file \"%s\": %m", + readfilename))); + } + + if (fread(buf + BLCKSZ * i, 1, BLCKSZ, fp) != BLCKSZ) + { + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not reread block %d of file \"%s\": %m", + blkno, readfilename))); + } + + if (fseek(fp, cnt - BLCKSZ * i - BLCKSZ, SEEK_CUR) == -1) + { + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not fseek in file \"%s\": %m", + readfilename))); + } + + /* Set flag so we know a retry was attempted */ + block_retry = true; + + /* Reset loop to validate the block again */ + i--; + continue; + } + checksum_failures++; + + if (checksum_failures <= 5) + ereport(WARNING, + (errmsg("checksum verification failed in " + "file \"%s\", block %d: calculated " + "%X but expected %X", + readfilename, blkno, checksum, + phdr->pd_checksum))); + if (checksum_failures == 5) + ereport(WARNING, + (errmsg("further checksum verification " + "failures in file \"%s\" will not " + "be reported", readfilename))); + } + } + } block_retry = false; blkno++; } diff --git a/src/backend/storage/page/bufpage.c b/src/backend/storage/page/bufpage.c index cb7b8c8a63..a46bf537e4 100644 --- a/src/backend/storage/page/bufpage.c +++ b/src/backend/storage/page/bufpage.c @@ -82,11 +82,8 @@ bool PageIsVerified(Page page, BlockNumber blkno) { PageHeader p = (PageHeader) page; - size_t *pagebytes; - int i; bool checksum_failure = false; bool header_sane = false; - bool all_zeroes = false; uint16 checksum = 0; /* @@ -120,18 +117,7 @@ PageIsVerified(Page page, BlockNumber blkno) } /* Check all-zeroes case */ - all_zeroes = true; - pagebytes = (size_t *) page; - for (i = 0; i < (BLCKSZ / sizeof(size_t)); i++) - { - if (pagebytes[i] != 0) - { - all_zeroes = false; - break; - } - } - - if (all_zeroes) + if (PageIsZero(page)) return true; /* @@ -154,6 +140,25 @@ PageIsVerified(Page page, BlockNumber blkno) return false; } +/* + * PageIsZero + * Check that the page consists only of zero bytes. + * + */ +bool +PageIsZero(Page page) +{ + int i; + size_t *pagebytes = (size_t *) page; + + for (i = 0; i < (BLCKSZ / sizeof(size_t)); i++) + { + if (pagebytes[i] != 0) + return false; + } + + return true; +} /* * PageAddItemExtended diff --git a/src/bin/pg_basebackup/t/010_pg_basebackup.pl b/src/bin/pg_basebackup/t/010_pg_basebackup.pl index 3c70499feb..6db758f115 100644 --- a/src/bin/pg_basebackup/t/010_pg_basebackup.pl +++ b/src/bin/pg_basebackup/t/010_pg_basebackup.pl @@ -6,7 +6,7 @@ use File::Basename qw(basename dirname); use File::Path qw(rmtree); use PostgresNode; use TestLib; -use Test::More tests => 107; +use Test::More tests => 110; program_help_ok('pg_basebackup'); program_version_ok('pg_basebackup'); @@ -495,21 +495,37 @@ my $file_corrupt2 = $node->safe_psql('postgres', my $pageheader_size = 24; my $block_size = $node->safe_psql('postgres', 'SHOW block_size;'); -# induce corruption +# induce corruption in the pageheader by writing random data into it system_or_bail 'pg_ctl', '-D', $pgdata, 'stop'; open $file, '+<', "$pgdata/$file_corrupt1"; -seek($file, $pageheader_size, 0); -syswrite($file, "\0\0\0\0\0\0\0\0\0"); +my $random_data = join '', map { ("a".."z")[rand 26] } 1 .. $pageheader_size; +syswrite($file, $random_data); +close $file; +system_or_bail 'pg_ctl', '-D', $pgdata, 'start'; + +$node->command_checks_all( + [ 'pg_basebackup', '-D', "$tempdir/backup_corrupt1" ], + 1, + [qr{^$}], + [qr/^WARNING.*checksum verification failed/s], + "pg_basebackup reports checksum mismatch for random pageheader data"); +rmtree("$tempdir/backup_corrupt1"); + +# zero out the pageheader completely +open $file, '+<', "$pgdata/$file_corrupt1"; +system_or_bail 'pg_ctl', '-D', $pgdata, 'stop'; +my $zero_data = "\0"x$pageheader_size; +syswrite($file, $zero_data); close $file; system_or_bail 'pg_ctl', '-D', $pgdata, 'start'; $node->command_checks_all( - [ 'pg_basebackup', '-D', "$tempdir/backup_corrupt" ], + [ 'pg_basebackup', '-D', "$tempdir/backup_corrupt1a" ], 1, [qr{^$}], [qr/^WARNING.*checksum verification failed/s], - 'pg_basebackup reports checksum mismatch'); -rmtree("$tempdir/backup_corrupt"); + "pg_basebackup reports checksum mismatch for zeroed pageheader"); +rmtree("$tempdir/backup_corrupt1a"); # induce further corruption in 5 more blocks system_or_bail 'pg_ctl', '-D', $pgdata, 'stop'; diff --git a/src/include/storage/bufpage.h b/src/include/storage/bufpage.h index 3f88683a05..a1fcb21cda 100644 --- a/src/include/storage/bufpage.h +++ b/src/include/storage/bufpage.h @@ -419,17 +419,18 @@ do { \ ((is_heap) ? PAI_IS_HEAP : 0)) /* - * Check that BLCKSZ is a multiple of sizeof(size_t). In PageIsVerified(), - * it is much faster to check if a page is full of zeroes using the native - * word size. Note that this assertion is kept within a header to make - * sure that StaticAssertDecl() works across various combinations of - * platforms and compilers. + * Check that BLCKSZ is a multiple of sizeof(size_t). In PageIsZero(), it is + * much faster to check if a page is full of zeroes using the native word size. + * Note that this assertion is kept within a header to make sure that + * StaticAssertDecl() works across various combinations of platforms and + * compilers. */ StaticAssertDecl(BLCKSZ == ((BLCKSZ / sizeof(size_t)) * sizeof(size_t)), "BLCKSZ has to be a multiple of sizeof(size_t)"); extern void PageInit(Page page, Size pageSize, Size specialSize); extern bool PageIsVerified(Page page, BlockNumber blkno); +extern bool PageIsZero(Page page); extern OffsetNumber PageAddItemExtended(Page page, Item item, Size size, OffsetNumber offsetNumber, int flags); extern Page PageGetTempPage(Page page);