From 0e27920a7a7eabecdc2eb8d578e45964f5a77b51 Mon Sep 17 00:00:00 2001 From: Hubert Zhang Date: Mon, 17 Feb 2020 09:16:44 +0000 Subject: [PATCH 1/1] Print physical file path when verify checksum failed Move the verify checksum logic into md.c to avoid every smgrread() needs to separately verify checksums. This also enables printing physical file path(including segno) when verify checksum failed. It should not handle outside md.c, since undo and redo may have different file segmentation. --- contrib/pg_prewarm/pg_prewarm.c | 2 +- src/backend/catalog/storage.c | 11 +---------- src/backend/storage/buffer/bufmgr.c | 28 ++++++++-------------------- src/backend/storage/page/bufpage.c | 33 ++++++++++++++++++++++++++++++--- src/backend/storage/smgr/md.c | 23 ++++++++++++++++++++--- src/backend/storage/smgr/smgr.c | 6 +++--- src/include/storage/bufpage.h | 6 +++--- src/include/storage/md.h | 2 +- src/include/storage/smgr.h | 2 +- src/include/storage/sync.h | 1 + 10 files changed, 69 insertions(+), 45 deletions(-) diff --git a/contrib/pg_prewarm/pg_prewarm.c b/contrib/pg_prewarm/pg_prewarm.c index 33e2d28b27..93059ef581 100644 --- a/contrib/pg_prewarm/pg_prewarm.c +++ b/contrib/pg_prewarm/pg_prewarm.c @@ -178,7 +178,7 @@ pg_prewarm(PG_FUNCTION_ARGS) for (block = first_block; block <= last_block; ++block) { CHECK_FOR_INTERRUPTS(); - smgrread(rel->rd_smgr, forkNumber, block, blockbuffer.data); + smgrread(rel->rd_smgr, forkNumber, block, blockbuffer.data, false); ++blocks_done; } } diff --git a/src/backend/catalog/storage.c b/src/backend/catalog/storage.c index fddfbf1d8c..8cb482cf1e 100644 --- a/src/backend/catalog/storage.c +++ b/src/backend/catalog/storage.c @@ -367,16 +367,7 @@ RelationCopyStorage(SMgrRelation src, SMgrRelation dst, /* If we got a cancel signal during the copy of the data, quit */ CHECK_FOR_INTERRUPTS(); - smgrread(src, forkNum, blkno, buf.data); - - if (!PageIsVerified(page, blkno)) - ereport(ERROR, - (errcode(ERRCODE_DATA_CORRUPTED), - errmsg("invalid page in block %u of relation %s", - blkno, - relpathbackend(src->smgr_rnode.node, - src->smgr_rnode.backend, - forkNum)))); + smgrread(src, forkNum, blkno, buf.data, false); /* * WAL-log the copied page. Unfortunately we don't know what kind of a diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c index 5880054245..12bbf9899c 100644 --- a/src/backend/storage/buffer/bufmgr.c +++ b/src/backend/storage/buffer/bufmgr.c @@ -891,11 +891,18 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, { instr_time io_start, io_time; + bool zeroDamagePage = false; if (track_io_timing) INSTR_TIME_SET_CURRENT(io_start); - smgrread(smgr, forkNum, blockNum, (char *) bufBlock); + /* + * Set whether zero damaged page in checksum veirfy + */ + if (mode == RBM_ZERO_ON_ERROR || zero_damaged_pages) + zeroDamagePage = true; + + smgrread(smgr, forkNum, blockNum, (char *) bufBlock, zeroDamagePage); if (track_io_timing) { @@ -905,25 +912,6 @@ ReadBuffer_common(SMgrRelation smgr, char relpersistence, ForkNumber forkNum, INSTR_TIME_ADD(pgBufferUsage.blk_read_time, io_time); } - /* check for garbage data */ - if (!PageIsVerified((Page) bufBlock, blockNum)) - { - if (mode == RBM_ZERO_ON_ERROR || zero_damaged_pages) - { - ereport(WARNING, - (errcode(ERRCODE_DATA_CORRUPTED), - errmsg("invalid page in block %u of relation %s; zeroing out page", - blockNum, - relpath(smgr->smgr_rnode, forkNum)))); - MemSet((char *) bufBlock, 0, BLCKSZ); - } - else - ereport(ERROR, - (errcode(ERRCODE_DATA_CORRUPTED), - errmsg("invalid page in block %u of relation %s", - blockNum, - relpath(smgr->smgr_rnode, forkNum)))); - } } } diff --git a/src/backend/storage/page/bufpage.c b/src/backend/storage/page/bufpage.c index 4ea6ea7a3d..0b1d4fb39c 100644 --- a/src/backend/storage/page/bufpage.c +++ b/src/backend/storage/page/bufpage.c @@ -26,7 +26,6 @@ /* GUC variable */ bool ignore_checksum_failure = false; - /* ---------------------------------------------------------------- * Page support functions * ---------------------------------------------------------------- @@ -61,13 +60,17 @@ PageInit(Page page, Size pageSize, Size specialSize) /* - * PageIsVerified + * VerifyPage * Check that the page header and checksum (if any) appear valid. * * This is called when a page has just been read in from disk. The idea is * to cheaply detect trashed pages before we go nuts following bogus line * pointers, testing invalid transaction identifiers, etc. * + * FileTag contains the segment file information which the page belongs to. + * Note that for large heap table, it may across many physical files. Better + * to print the exact file path when checksum fails. + * * It turns out to be necessary to allow zeroed pages here too. Even though * this routine is *not* called when deliberately adding a page to a relation, * there are scenarios in which a zeroed page might be found in a table. @@ -79,7 +82,7 @@ PageInit(Page page, Size pageSize, Size specialSize) * will clean up such a page and make it usable. */ bool -PageIsVerified(Page page, BlockNumber blkno) +VerifyPage(const FileTag *ftag, Page page, BlockNumber blkno, bool zeroDamagePage) { PageHeader p = (PageHeader) page; size_t *pagebytes; @@ -151,6 +154,30 @@ PageIsVerified(Page page, BlockNumber blkno) return true; } + /* + * Throw out ERROR or WARNING based on whether zero_damaged_pages_in_checksum is set + * + * Note that using GUC zero_damaged_pages is not suffient here, since we should also + * zero damaged page when ReadBufferMode is RBM_ZERO_ON_ERROR. + */ + if (zeroDamagePage) + { + ereport(WARNING, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("invalid page in block %u of relation file %s.%u; zeroing out page", + blkno, + relpathbackend(ftag->rnode, ftag->backend, ftag->forknum), + ftag->segno))); + MemSet((char *) page, 0, BLCKSZ); + } + else + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("invalid page in block %u of relation file %s.%u", + blkno, + relpathbackend(ftag->rnode, ftag->backend, ftag->forknum), + ftag->segno))); + return false; } diff --git a/src/backend/storage/smgr/md.c b/src/backend/storage/smgr/md.c index c5b771c531..242b9f846c 100644 --- a/src/backend/storage/smgr/md.c +++ b/src/backend/storage/smgr/md.c @@ -94,6 +94,17 @@ static MemoryContext MdCxt; /* context for all MdfdVec objects */ memset(&(a), 0, sizeof(FileTag)), \ (a).handler = SYNC_HANDLER_MD, \ (a).rnode = (xx_rnode), \ + (a).backend = InvalidBackendId, \ + (a).forknum = (xx_forknum), \ + (a).segno = (xx_segno) \ +) + +#define INIT_MD_FILETAG_WITH_BACKEND(a,xx_rnode,xx_backend,xx_forknum,xx_segno) \ +( \ + memset(&(a), 0, sizeof(FileTag)), \ + (a).handler = SYNC_HANDLER_MD, \ + (a).rnode = (xx_rnode), \ + (a).backend = (xx_backend), \ (a).forknum = (xx_forknum), \ (a).segno = (xx_segno) \ ) @@ -599,11 +610,12 @@ mdwriteback(SMgrRelation reln, ForkNumber forknum, */ void mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, - char *buffer) + char *buffer, bool zeroDamagePage) { off_t seekpos; int nbytes; MdfdVec *v; + FileTag tag; TRACE_POSTGRESQL_SMGR_MD_READ_START(forknum, blocknum, reln->smgr_rnode.node.spcNode, @@ -639,12 +651,12 @@ mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, /* * Short read: we are at or past EOF, or we read a partial block at * EOF. Normally this is an error; upper levels should never try to - * read a nonexistent block. However, if zero_damaged_pages is ON or + * read a nonexistent block. However, if zeroDamagePage is ON or * we are InRecovery, we should instead return zeroes without * complaining. This allows, for example, the case of trying to * update a block that was later truncated away. */ - if (zero_damaged_pages || InRecovery) + if (zeroDamagePage || InRecovery) MemSet(buffer, 0, BLCKSZ); else ereport(ERROR, @@ -653,6 +665,11 @@ mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, blocknum, FilePathName(v->mdfd_vfd), nbytes, BLCKSZ))); } + + /* verify the read page content satisfy page checksum */ + INIT_MD_FILETAG_WITH_BACKEND(tag, reln->smgr_rnode.node, reln->smgr_rnode.backend, forknum, v->mdfd_segno); + + VerifyPage(&tag, buffer, blocknum, zeroDamagePage); } /* diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c index 360b5bf5bf..a7048d44c4 100644 --- a/src/backend/storage/smgr/smgr.c +++ b/src/backend/storage/smgr/smgr.c @@ -52,7 +52,7 @@ typedef struct f_smgr void (*smgr_prefetch) (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum); void (*smgr_read) (SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum, char *buffer); + BlockNumber blocknum, char *buffer, bool zeroDamagePage); void (*smgr_write) (SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync); void (*smgr_writeback) (SMgrRelation reln, ForkNumber forknum, @@ -506,9 +506,9 @@ smgrprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum) */ void smgrread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, - char *buffer) + char *buffer, bool zeroDamagePage) { - smgrsw[reln->smgr_which].smgr_read(reln, forknum, blocknum, buffer); + smgrsw[reln->smgr_which].smgr_read(reln, forknum, blocknum, buffer, zeroDamagePage); } /* diff --git a/src/include/storage/bufpage.h b/src/include/storage/bufpage.h index 3f88683a05..0037819b1c 100644 --- a/src/include/storage/bufpage.h +++ b/src/include/storage/bufpage.h @@ -18,6 +18,7 @@ #include "storage/block.h" #include "storage/item.h" #include "storage/off.h" +#include "storage/sync.h" /* For FileTag */ /* * A postgres disk page is an abstraction layered on top of a postgres @@ -419,7 +420,7 @@ do { \ ((is_heap) ? PAI_IS_HEAP : 0)) /* - * Check that BLCKSZ is a multiple of sizeof(size_t). In PageIsVerified(), + * Check that BLCKSZ is a multiple of sizeof(size_t). In VerifyPage(), * it is much faster to check if a page is full of zeroes using the native * word size. Note that this assertion is kept within a header to make * sure that StaticAssertDecl() works across various combinations of @@ -429,7 +430,7 @@ StaticAssertDecl(BLCKSZ == ((BLCKSZ / sizeof(size_t)) * sizeof(size_t)), "BLCKSZ has to be a multiple of sizeof(size_t)"); extern void PageInit(Page page, Size pageSize, Size specialSize); -extern bool PageIsVerified(Page page, BlockNumber blkno); +extern bool VerifyPage(const FileTag *ftag, Page page, BlockNumber blkno, bool zeroDamagePage); extern OffsetNumber PageAddItemExtended(Page page, Item item, Size size, OffsetNumber offsetNumber, int flags); extern Page PageGetTempPage(Page page); @@ -448,5 +449,4 @@ extern bool PageIndexTupleOverwrite(Page page, OffsetNumber offnum, Item newtup, Size newsize); extern char *PageSetChecksumCopy(Page page, BlockNumber blkno); extern void PageSetChecksumInplace(Page page, BlockNumber blkno); - #endif /* BUFPAGE_H */ diff --git a/src/include/storage/md.h b/src/include/storage/md.h index ec7630ce3b..7537c4f490 100644 --- a/src/include/storage/md.h +++ b/src/include/storage/md.h @@ -31,7 +31,7 @@ extern void mdextend(SMgrRelation reln, ForkNumber forknum, extern void mdprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum); extern void mdread(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, - char *buffer); + char *buffer, bool zeroDamagePage); extern void mdwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync); extern void mdwriteback(SMgrRelation reln, ForkNumber forknum, diff --git a/src/include/storage/smgr.h b/src/include/storage/smgr.h index 243822137c..e8eda56f14 100644 --- a/src/include/storage/smgr.h +++ b/src/include/storage/smgr.h @@ -95,7 +95,7 @@ extern void smgrextend(SMgrRelation reln, ForkNumber forknum, extern void smgrprefetch(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum); extern void smgrread(SMgrRelation reln, ForkNumber forknum, - BlockNumber blocknum, char *buffer); + BlockNumber blocknum, char *buffer, bool zeroDamagePage); extern void smgrwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum, char *buffer, bool skipFsync); extern void smgrwriteback(SMgrRelation reln, ForkNumber forknum, diff --git a/src/include/storage/sync.h b/src/include/storage/sync.h index e16ab8e711..2c34579eec 100644 --- a/src/include/storage/sync.h +++ b/src/include/storage/sync.h @@ -47,6 +47,7 @@ typedef struct FileTag int16 handler; /* SyncRequestHandler value, saving space */ int16 forknum; /* ForkNumber, saving space */ RelFileNode rnode; + BackendId backend; uint32 segno; } FileTag; -- 2.16.6