From 090dab37f8d3fa5a9b872d938e211bf8d69adab5 Mon Sep 17 00:00:00 2001 From: Julien Rouhaud Date: Thu, 15 Oct 2020 13:20:17 +0800 Subject: [PATCH v14 2/2] Add a pg_check_relation() SQL function This functions checks the validity of the checksums for all non-dirty blocks of a given relation, and optionally a given fork, and returns the list of all blocks that don't match, along with the expected and found checksums.k Author: Julien Rouhaud Reviewed-by: Michael Paquier, Masahiko Sawada, Justin Pryzby Discussion: https://postgr.es/m/CAOBaU_aVvMjQn%3Dge5qPiJOPMmOj5%3Dii3st5Q0Y%2BWuLML5sR17w%40mail.gmail.com --- doc/src/sgml/func.sgml | 51 ++++ src/backend/utils/adt/Makefile | 1 + src/include/catalog/pg_proc.dat | 16 + src/test/modules/Makefile | 1 + src/test/modules/check_relation/.gitignore | 2 + src/test/modules/check_relation/Makefile | 14 + src/test/modules/check_relation/README | 23 ++ .../check_relation/t/001_checksums_check.pl | 276 ++++++++++++++++++ src/tools/msvc/Mkvcbuild.pm | 3 +- 9 files changed, 386 insertions(+), 1 deletion(-) create mode 100644 src/test/modules/check_relation/.gitignore create mode 100644 src/test/modules/check_relation/Makefile create mode 100644 src/test/modules/check_relation/README create mode 100644 src/test/modules/check_relation/t/001_checksums_check.pl diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index e7cff980dd..28663107c8 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -26217,6 +26217,57 @@ SELECT convert_from(pg_read_binary_file('file_in_utf8.txt'), 'UTF8'); + + Data Sanity Functions + + + The functions shown in + provide a means to check for health of a data file in a cluster. + + + + Data Sanity Functions + + + Name Return Type Description + + + + + + + pg_check_relation(relation regclass [, fork text]) + + setof record + Validate the checksum for all blocks of a relation. + + + + +
+ + + pg_check_relation + + + pg_check_relation iterates over all blocks of a + given relation and verifies their checksums. If passed, + fork specifies that only checksums of the given + fork are to be verified. Fork should be 'main' for the + main data fork, 'fsm' for the free space map, + 'vm' for the visibility map, or + 'init' for the initialization fork. + The function returns a list of blocks for which the computed and stored + checksums don't match. See for + information on how to configure cost-based verification delay. You must be + a member of the pg_read_all_stats role to use this + function. It can only be used if data checksums are enabled. See for more information. + + +
+ diff --git a/src/backend/utils/adt/Makefile b/src/backend/utils/adt/Makefile index b4d55e849b..603f63afb6 100644 --- a/src/backend/utils/adt/Makefile +++ b/src/backend/utils/adt/Makefile @@ -22,6 +22,7 @@ OBJS = \ bool.o \ cash.o \ char.o \ + checksumfuncs.o \ cryptohashes.o \ date.o \ datetime.o \ diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index 22340baf1c..9f4514d60f 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -10958,6 +10958,22 @@ proallargtypes => '{oid,text,int8,timestamptz}', proargmodes => '{i,o,o,o}', proargnames => '{tablespace,name,size,modification}', prosrc => 'pg_ls_tmpdir_1arg' }, +{ oid => '9147', descr => 'check data integrity for all forks of a relation', + proname => 'pg_check_relation', procost => '10000', + prorows => '20', proretset => 't', proparallel => 'r', + provolatile => 'v', prorettype => 'record', proargtypes => 'regclass', + proallargtypes => '{regclass,oid,int4,int8,int4,int4}', + proargmodes => '{i,o,o,o,o,o}', + proargnames => '{relation,relid,forknum,failed_blocknum,expected_checksum,found_checksum}', + prosrc => 'pg_check_relation' }, +{ oid => '9148', descr => 'check data integrity for one fork of a relation', + proname => 'pg_check_relation', procost => '10000', + prorows => '20', proretset => 't', proparallel => 'r', + provolatile => 'v', prorettype => 'record', proargtypes => 'regclass text', + proallargtypes => '{regclass,text,oid,int4,int8,int4,int4}', + proargmodes => '{i,i,o,o,o,o,o}', + proargnames => '{relation,fork,relid,forknum,failed_blocknum,expected_checksum,found_checksum}', + prosrc => 'pg_check_relation_fork' }, # hash partitioning constraint function { oid => '5028', descr => 'hash partition CHECK constraint', diff --git a/src/test/modules/Makefile b/src/test/modules/Makefile index a6d2ffbf9e..a845af71fd 100644 --- a/src/test/modules/Makefile +++ b/src/test/modules/Makefile @@ -7,6 +7,7 @@ include $(top_builddir)/src/Makefile.global SUBDIRS = \ brin \ commit_ts \ + check_relation \ delay_execution \ dummy_index_am \ dummy_seclabel \ diff --git a/src/test/modules/check_relation/.gitignore b/src/test/modules/check_relation/.gitignore new file mode 100644 index 0000000000..871e943d50 --- /dev/null +++ b/src/test/modules/check_relation/.gitignore @@ -0,0 +1,2 @@ +# Generated by test suite +/tmp_check/ diff --git a/src/test/modules/check_relation/Makefile b/src/test/modules/check_relation/Makefile new file mode 100644 index 0000000000..a540cdece2 --- /dev/null +++ b/src/test/modules/check_relation/Makefile @@ -0,0 +1,14 @@ +# src/test/modules/check_relation/Makefile + +TAP_TESTS = 1 + +ifdef USE_PGXS +PG_CONFIG = pg_config +PGXS := $(shell $(PG_CONFIG) --pgxs) +include $(PGXS) +else +subdir = src/test/modules/check_relation +top_builddir = ../../../.. +include $(top_builddir)/src/Makefile.global +include $(top_srcdir)/contrib/contrib-global.mk +endif diff --git a/src/test/modules/check_relation/README b/src/test/modules/check_relation/README new file mode 100644 index 0000000000..415c4b21ad --- /dev/null +++ b/src/test/modules/check_relation/README @@ -0,0 +1,23 @@ +src/test/check_relation/README + +Regression tests for online checksums verification +================================================== + +This directory contains a test suite for online checksums verification. + +Running the tests +================= + +NOTE: You must have given the --enable-tap-tests argument to configure. + +Run + make check +or + make installcheck +You can use "make installcheck" if you previously did "make install". +In that case, the code in the installation tree is tested. With +"make check", a temporary installation tree is built from the current +sources and then tested. + +Either way, this test initializes, starts, and stops a test Postgres +cluster. diff --git a/src/test/modules/check_relation/t/001_checksums_check.pl b/src/test/modules/check_relation/t/001_checksums_check.pl new file mode 100644 index 0000000000..2a3f2880ea --- /dev/null +++ b/src/test/modules/check_relation/t/001_checksums_check.pl @@ -0,0 +1,276 @@ +use strict; +use warnings; + +use PostgresNode; +use TestLib; +use Test::More tests => 59; + +our $CHECKSUM_UINT16_OFFSET = 4; +our $PD_UPPER_UINT16_OFFSET = 7; +our $BLOCKSIZE; +our $TOTAL_NB_ERR = 0; + +sub get_block +{ + my ($filename, $blkno) = @_; + my $block; + + open(my $infile, '<', $filename) or die; + binmode($infile); + + my $success = read($infile, $block, $BLOCKSIZE, ($blkno * $BLOCKSIZE)); + die($!) if not defined $success; + + close($infile); + + return($block); +} + +sub overwrite_block +{ + my ($filename, $block, $blkno) = @_; + + open(my $outfile, '>', $filename) or die; + binmode ($outfile); + + my $nb = syswrite($outfile, $block, $BLOCKSIZE, ($blkno * $BLOCKSIZE)); + + die($!) if not defined $nb; + die("Write error") if ($nb != $BLOCKSIZE); + + $outfile->flush(); + + close($outfile); +} + +sub get_uint16_from_page +{ + my ($block, $offset) = @_; + + return (unpack("S*", $block))[$offset]; +} + +sub set_uint16_to_page +{ + my ($block, $data, $offset) = @_; + + my $pack = pack("S", $data); + + # vec with 16B or more won't preserve endianness + vec($block, 2*$offset, 8) = (unpack('C*', $pack))[0]; + vec($block, (2*$offset) + 1, 8) = (unpack('C*', $pack))[1]; + + return $block; +} + +sub check_checksums_call +{ + my ($node, $relname) = @_; + + my ($cmdret, $stdout, $stderr) = $node->psql('postgres', "SELECT COUNT(*)" + . " FROM pg_catalog.pg_check_relation('$relname')" + ); + + return ($stderr eq ''); +} + +sub check_checksums_nb_error +{ + my ($node, $nb, $pattern) = @_; + + my ($cmdret, $stdout, $stderr) = $node->psql('postgres', "SELECT COUNT(*)" + . " FROM (SELECT pg_catalog.pg_check_relation(oid, 'main')" + . " FROM pg_class WHERE relkind in ('r', 'i', 'm')) AS s" + ); + + is($cmdret, 0, 'Function should run successfully'); + like($stderr, $pattern, 'Error output should match expectations'); + is($stdout, $nb, "Should have $nb error"); + + $TOTAL_NB_ERR += $nb; +} + +sub check_pg_stat_database_nb_error +{ + my ($node) = @_; + + my ($cmdret, $stdout, $stderr) = $node->psql('postgres', "SELECT " + . " sum(checksum_failures)" + . " FROM pg_catalog.pg_stat_database" + ); + + is($cmdret, 0, 'Function should run successfully'); + is($stderr, '', 'Function should run successfully'); + is($stdout, $TOTAL_NB_ERR, "Should have $TOTAL_NB_ERR error"); +} + +sub get_checksums_errors +{ + my ($node, $nb, $pattern) = @_; + + my ($cmdret, $stdout, $stderr) = $node->psql('postgres', "SELECT" + . " relid::regclass::text, forknum, failed_blocknum," + . " expected_checksum, found_checksum" + . " FROM (SELECT (pg_catalog.pg_check_relation(oid)).*" + . " FROM pg_class WHERE relkind in ('r','i', 'm')) AS s" + ); + + is($cmdret, '0', 'Function should run successfully'); + like($stderr, $pattern, 'Error output should match expectations'); + + $TOTAL_NB_ERR += $nb; + + return $stdout; +} + +# This function will perform various test by modifying the specified block at +# the specified uint16 offset, checking that the corruption is correctly +# detected, and finally restore the specified block to its original content. +sub corrupt_and_test_block +{ + my ($node, $filename, $blkno, $offset, $fake_data) = @_; + + check_checksums_nb_error($node, 0, qr/^$/); + + check_pg_stat_database_nb_error($node); + + $node->stop(); + + my $original_block = get_block($filename, 0); + my $original_data = get_uint16_from_page($original_block, $offset); + + isnt($original_data, $fake_data, + "The fake data at offset $offset should be different" + . " from the existing one"); + + my $new_block = set_uint16_to_page($original_block, $fake_data, $offset); + isnt($original_data, get_uint16_from_page($new_block, $offset), + "The fake data at offset $offset should have been changed in memory"); + + overwrite_block($filename, $new_block, 0); + + my $written_data = get_uint16_from_page(get_block($filename, 0), $offset); + isnt($original_data, $written_data, + "The data written at offset $offset should be different" + . " from the original one"); + is(get_uint16_from_page($new_block, $offset), $written_data, + "The data written at offset $offset should be the same" + . " as the one in memory"); + is($written_data, $fake_data, + "The data written at offset $offset should be the one" + . " we wanted to write"); + + $node->start(); + + check_checksums_nb_error($node, 1, qr/invalid page in block $blkno/); + + my $expected_checksum; + my $found_checksum = get_uint16_from_page($new_block, + $CHECKSUM_UINT16_OFFSET); + if ($offset == $PD_UPPER_UINT16_OFFSET) + { + # A checksum can't be computed if it's detected as PageIsNew(), so the + # function returns NULL for the computed checksum + $expected_checksum = ''; + } + else + { + $expected_checksum = get_uint16_from_page($original_block, + $CHECKSUM_UINT16_OFFSET); + } + + my $det = get_checksums_errors($node, 1, qr/invalid page in block $blkno/); + is($det, "t1|0|0|$expected_checksum|$found_checksum", + "The checksums error for modification at offset $offset" + . " should be detected"); + + $node->stop(); + + $new_block = set_uint16_to_page($original_block, $original_data, $offset); + is($original_data, get_uint16_from_page($new_block, $offset), + "The data at offset $offset should have been restored in memory"); + + overwrite_block($filename, $new_block, 0); + is($original_data, get_uint16_from_page(get_block($filename, $blkno), + $offset), + "The data at offset $offset should have been restored on disk"); + + $node->start(); + + check_checksums_nb_error($node, 0, qr/^$/); +} + +if (exists $ENV{MY_PG_REGRESS}) +{ + $ENV{PG_REGRESS} = $ENV{MY_PG_REGRESS}; +} + +my $node = get_new_node('main'); + +my %params; +$params{'extra'} = ['--data-checksums']; +$node->init(%params); + +$node->start(); + +$ENV{PGOPTIONS} = '--client-min-messages=WARNING'; + +my ($cmdret, $stdout, $stderr) = $node->psql('postgres', "SELECT" + . " current_setting('data_checksums')"); + +is($stdout, 'on', 'Data checksums should be enabled'); + +($cmdret, $stdout, $stderr) = $node->psql('postgres', "SELECT" + . " current_setting('block_size')"); + +$BLOCKSIZE = $stdout; + +$node->safe_psql( + 'postgres', q| + CREATE TABLE public.t1(id integer); + CREATE INDEX t1_id_idx ON public.t1 (id); + INSERT INTO public.t1 SELECT generate_series(1, 100); + CREATE VIEW public.v1 AS SELECT * FROM t1; + CREATE MATERIALIZED VIEW public.mv1 AS SELECT * FROM t1; + CREATE SEQUENCE public.s1; + CREATE UNLOGGED TABLE public.u_t1(id integer); + CREATE INDEX u_t1_id_idx ON public.u_t1 (id); + INSERT INTO public.u_t1 SELECT generate_series(1, 100); + CHECKPOINT; +|); + +# Check sane behavior on various objects type, including those that don't have +# a storage. +is(check_checksums_call($node, 't1'), '1', 'Can check a table'); +is(check_checksums_call($node, 't1_id_idx'), '1', 'Can check an index'); +is(check_checksums_call($node, 'v1'), '', 'Cannot check a view'); +is(check_checksums_call($node, 'mv1'), '1', 'Can check a materialized view'); +is(check_checksums_call($node, 's1'), '1', 'Can check a sequence'); +is(check_checksums_call($node, 'u_t1'), '1', 'Can check an unlogged table'); +is(check_checksums_call($node, 'u_t1_id_idx'), '1', 'Can check an unlogged index'); + +# get the underlying heap absolute path +($cmdret, $stdout, $stderr) = $node->psql('postgres', "SELECT" + . " current_setting('data_directory') || '/' || pg_relation_filepath('t1')" +); + +isnt($stdout, '', 'A relfilenode should be returned'); + +my $filename = $stdout; + +check_checksums_nb_error($node, 0, qr/^$/); + +check_pg_stat_database_nb_error($node); + +my $fake_uint16 = hex '0x0000'; + +# Test with a modified checksum. We use a zero checksum here as it's the only +# one that cannot exist on a checksummed page. We also don't have an easy way +# to compute what the checksum would be after a modification in a random place +# in the block. +corrupt_and_test_block($node, $filename, 0, $CHECKSUM_UINT16_OFFSET, + $fake_uint16); + +# Test corruption making the block looks like it's PageIsNew(). +corrupt_and_test_block($node, $filename, 0, $PD_UPPER_UINT16_OFFSET, + $fake_uint16); diff --git a/src/tools/msvc/Mkvcbuild.pm b/src/tools/msvc/Mkvcbuild.pm index 90594bd41b..adbabba3c4 100644 --- a/src/tools/msvc/Mkvcbuild.pm +++ b/src/tools/msvc/Mkvcbuild.pm @@ -50,7 +50,8 @@ my @contrib_excludes = ( 'pgcrypto', 'sepgsql', 'brin', 'test_extensions', 'test_misc', 'test_pg_dump', - 'snapshot_too_old', 'unsafe_tests'); + 'snapshot_too_old', 'unsafe_tests', + 'check_relation'); # Set of variables for frontend modules my $frontend_defines = { 'initdb' => 'FRONTEND' }; -- 2.20.1