From 090dab37f8d3fa5a9b872d938e211bf8d69adab5 Mon Sep 17 00:00:00 2001
From: Julien Rouhaud
Date: Thu, 15 Oct 2020 13:20:17 +0800
Subject: [PATCH v14 2/2] Add a pg_check_relation() SQL function
This functions checks the validity of the checksums for all non-dirty blocks of
a given relation, and optionally a given fork, and returns the list of all
blocks that don't match, along with the expected and found checksums.k
Author: Julien Rouhaud
Reviewed-by: Michael Paquier, Masahiko Sawada, Justin Pryzby
Discussion: https://postgr.es/m/CAOBaU_aVvMjQn%3Dge5qPiJOPMmOj5%3Dii3st5Q0Y%2BWuLML5sR17w%40mail.gmail.com
---
doc/src/sgml/func.sgml | 51 ++++
src/backend/utils/adt/Makefile | 1 +
src/include/catalog/pg_proc.dat | 16 +
src/test/modules/Makefile | 1 +
src/test/modules/check_relation/.gitignore | 2 +
src/test/modules/check_relation/Makefile | 14 +
src/test/modules/check_relation/README | 23 ++
.../check_relation/t/001_checksums_check.pl | 276 ++++++++++++++++++
src/tools/msvc/Mkvcbuild.pm | 3 +-
9 files changed, 386 insertions(+), 1 deletion(-)
create mode 100644 src/test/modules/check_relation/.gitignore
create mode 100644 src/test/modules/check_relation/Makefile
create mode 100644 src/test/modules/check_relation/README
create mode 100644 src/test/modules/check_relation/t/001_checksums_check.pl
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index e7cff980dd..28663107c8 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -26217,6 +26217,57 @@ SELECT convert_from(pg_read_binary_file('file_in_utf8.txt'), 'UTF8');
+
+ Data Sanity Functions
+
+
+ The functions shown in
+ provide a means to check for health of a data file in a cluster.
+
+
+
+ Data Sanity Functions
+
+
+ Name Return Type Description
+
+
+
+
+
+
+ pg_check_relation(relation regclass [, fork text])
+
+ setof record
+ Validate the checksum for all blocks of a relation.
+
+
+
+
+
+
+
+ pg_check_relation
+
+
+ pg_check_relation iterates over all blocks of a
+ given relation and verifies their checksums. If passed,
+ fork specifies that only checksums of the given
+ fork are to be verified. Fork should be 'main' for the
+ main data fork, 'fsm' for the free space map,
+ 'vm' for the visibility map, or
+ 'init' for the initialization fork.
+ The function returns a list of blocks for which the computed and stored
+ checksums don't match. See for
+ information on how to configure cost-based verification delay. You must be
+ a member of the pg_read_all_stats role to use this
+ function. It can only be used if data checksums are enabled. See for more information.
+
+
+
+
diff --git a/src/backend/utils/adt/Makefile b/src/backend/utils/adt/Makefile
index b4d55e849b..603f63afb6 100644
--- a/src/backend/utils/adt/Makefile
+++ b/src/backend/utils/adt/Makefile
@@ -22,6 +22,7 @@ OBJS = \
bool.o \
cash.o \
char.o \
+ checksumfuncs.o \
cryptohashes.o \
date.o \
datetime.o \
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 22340baf1c..9f4514d60f 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -10958,6 +10958,22 @@
proallargtypes => '{oid,text,int8,timestamptz}', proargmodes => '{i,o,o,o}',
proargnames => '{tablespace,name,size,modification}',
prosrc => 'pg_ls_tmpdir_1arg' },
+{ oid => '9147', descr => 'check data integrity for all forks of a relation',
+ proname => 'pg_check_relation', procost => '10000',
+ prorows => '20', proretset => 't', proparallel => 'r',
+ provolatile => 'v', prorettype => 'record', proargtypes => 'regclass',
+ proallargtypes => '{regclass,oid,int4,int8,int4,int4}',
+ proargmodes => '{i,o,o,o,o,o}',
+ proargnames => '{relation,relid,forknum,failed_blocknum,expected_checksum,found_checksum}',
+ prosrc => 'pg_check_relation' },
+{ oid => '9148', descr => 'check data integrity for one fork of a relation',
+ proname => 'pg_check_relation', procost => '10000',
+ prorows => '20', proretset => 't', proparallel => 'r',
+ provolatile => 'v', prorettype => 'record', proargtypes => 'regclass text',
+ proallargtypes => '{regclass,text,oid,int4,int8,int4,int4}',
+ proargmodes => '{i,i,o,o,o,o,o}',
+ proargnames => '{relation,fork,relid,forknum,failed_blocknum,expected_checksum,found_checksum}',
+ prosrc => 'pg_check_relation_fork' },
# hash partitioning constraint function
{ oid => '5028', descr => 'hash partition CHECK constraint',
diff --git a/src/test/modules/Makefile b/src/test/modules/Makefile
index a6d2ffbf9e..a845af71fd 100644
--- a/src/test/modules/Makefile
+++ b/src/test/modules/Makefile
@@ -7,6 +7,7 @@ include $(top_builddir)/src/Makefile.global
SUBDIRS = \
brin \
commit_ts \
+ check_relation \
delay_execution \
dummy_index_am \
dummy_seclabel \
diff --git a/src/test/modules/check_relation/.gitignore b/src/test/modules/check_relation/.gitignore
new file mode 100644
index 0000000000..871e943d50
--- /dev/null
+++ b/src/test/modules/check_relation/.gitignore
@@ -0,0 +1,2 @@
+# Generated by test suite
+/tmp_check/
diff --git a/src/test/modules/check_relation/Makefile b/src/test/modules/check_relation/Makefile
new file mode 100644
index 0000000000..a540cdece2
--- /dev/null
+++ b/src/test/modules/check_relation/Makefile
@@ -0,0 +1,14 @@
+# src/test/modules/check_relation/Makefile
+
+TAP_TESTS = 1
+
+ifdef USE_PGXS
+PG_CONFIG = pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
+else
+subdir = src/test/modules/check_relation
+top_builddir = ../../../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
diff --git a/src/test/modules/check_relation/README b/src/test/modules/check_relation/README
new file mode 100644
index 0000000000..415c4b21ad
--- /dev/null
+++ b/src/test/modules/check_relation/README
@@ -0,0 +1,23 @@
+src/test/check_relation/README
+
+Regression tests for online checksums verification
+==================================================
+
+This directory contains a test suite for online checksums verification.
+
+Running the tests
+=================
+
+NOTE: You must have given the --enable-tap-tests argument to configure.
+
+Run
+ make check
+or
+ make installcheck
+You can use "make installcheck" if you previously did "make install".
+In that case, the code in the installation tree is tested. With
+"make check", a temporary installation tree is built from the current
+sources and then tested.
+
+Either way, this test initializes, starts, and stops a test Postgres
+cluster.
diff --git a/src/test/modules/check_relation/t/001_checksums_check.pl b/src/test/modules/check_relation/t/001_checksums_check.pl
new file mode 100644
index 0000000000..2a3f2880ea
--- /dev/null
+++ b/src/test/modules/check_relation/t/001_checksums_check.pl
@@ -0,0 +1,276 @@
+use strict;
+use warnings;
+
+use PostgresNode;
+use TestLib;
+use Test::More tests => 59;
+
+our $CHECKSUM_UINT16_OFFSET = 4;
+our $PD_UPPER_UINT16_OFFSET = 7;
+our $BLOCKSIZE;
+our $TOTAL_NB_ERR = 0;
+
+sub get_block
+{
+ my ($filename, $blkno) = @_;
+ my $block;
+
+ open(my $infile, '<', $filename) or die;
+ binmode($infile);
+
+ my $success = read($infile, $block, $BLOCKSIZE, ($blkno * $BLOCKSIZE));
+ die($!) if not defined $success;
+
+ close($infile);
+
+ return($block);
+}
+
+sub overwrite_block
+{
+ my ($filename, $block, $blkno) = @_;
+
+ open(my $outfile, '>', $filename) or die;
+ binmode ($outfile);
+
+ my $nb = syswrite($outfile, $block, $BLOCKSIZE, ($blkno * $BLOCKSIZE));
+
+ die($!) if not defined $nb;
+ die("Write error") if ($nb != $BLOCKSIZE);
+
+ $outfile->flush();
+
+ close($outfile);
+}
+
+sub get_uint16_from_page
+{
+ my ($block, $offset) = @_;
+
+ return (unpack("S*", $block))[$offset];
+}
+
+sub set_uint16_to_page
+{
+ my ($block, $data, $offset) = @_;
+
+ my $pack = pack("S", $data);
+
+ # vec with 16B or more won't preserve endianness
+ vec($block, 2*$offset, 8) = (unpack('C*', $pack))[0];
+ vec($block, (2*$offset) + 1, 8) = (unpack('C*', $pack))[1];
+
+ return $block;
+}
+
+sub check_checksums_call
+{
+ my ($node, $relname) = @_;
+
+ my ($cmdret, $stdout, $stderr) = $node->psql('postgres', "SELECT COUNT(*)"
+ . " FROM pg_catalog.pg_check_relation('$relname')"
+ );
+
+ return ($stderr eq '');
+}
+
+sub check_checksums_nb_error
+{
+ my ($node, $nb, $pattern) = @_;
+
+ my ($cmdret, $stdout, $stderr) = $node->psql('postgres', "SELECT COUNT(*)"
+ . " FROM (SELECT pg_catalog.pg_check_relation(oid, 'main')"
+ . " FROM pg_class WHERE relkind in ('r', 'i', 'm')) AS s"
+ );
+
+ is($cmdret, 0, 'Function should run successfully');
+ like($stderr, $pattern, 'Error output should match expectations');
+ is($stdout, $nb, "Should have $nb error");
+
+ $TOTAL_NB_ERR += $nb;
+}
+
+sub check_pg_stat_database_nb_error
+{
+ my ($node) = @_;
+
+ my ($cmdret, $stdout, $stderr) = $node->psql('postgres', "SELECT "
+ . " sum(checksum_failures)"
+ . " FROM pg_catalog.pg_stat_database"
+ );
+
+ is($cmdret, 0, 'Function should run successfully');
+ is($stderr, '', 'Function should run successfully');
+ is($stdout, $TOTAL_NB_ERR, "Should have $TOTAL_NB_ERR error");
+}
+
+sub get_checksums_errors
+{
+ my ($node, $nb, $pattern) = @_;
+
+ my ($cmdret, $stdout, $stderr) = $node->psql('postgres', "SELECT"
+ . " relid::regclass::text, forknum, failed_blocknum,"
+ . " expected_checksum, found_checksum"
+ . " FROM (SELECT (pg_catalog.pg_check_relation(oid)).*"
+ . " FROM pg_class WHERE relkind in ('r','i', 'm')) AS s"
+ );
+
+ is($cmdret, '0', 'Function should run successfully');
+ like($stderr, $pattern, 'Error output should match expectations');
+
+ $TOTAL_NB_ERR += $nb;
+
+ return $stdout;
+}
+
+# This function will perform various test by modifying the specified block at
+# the specified uint16 offset, checking that the corruption is correctly
+# detected, and finally restore the specified block to its original content.
+sub corrupt_and_test_block
+{
+ my ($node, $filename, $blkno, $offset, $fake_data) = @_;
+
+ check_checksums_nb_error($node, 0, qr/^$/);
+
+ check_pg_stat_database_nb_error($node);
+
+ $node->stop();
+
+ my $original_block = get_block($filename, 0);
+ my $original_data = get_uint16_from_page($original_block, $offset);
+
+ isnt($original_data, $fake_data,
+ "The fake data at offset $offset should be different"
+ . " from the existing one");
+
+ my $new_block = set_uint16_to_page($original_block, $fake_data, $offset);
+ isnt($original_data, get_uint16_from_page($new_block, $offset),
+ "The fake data at offset $offset should have been changed in memory");
+
+ overwrite_block($filename, $new_block, 0);
+
+ my $written_data = get_uint16_from_page(get_block($filename, 0), $offset);
+ isnt($original_data, $written_data,
+ "The data written at offset $offset should be different"
+ . " from the original one");
+ is(get_uint16_from_page($new_block, $offset), $written_data,
+ "The data written at offset $offset should be the same"
+ . " as the one in memory");
+ is($written_data, $fake_data,
+ "The data written at offset $offset should be the one"
+ . " we wanted to write");
+
+ $node->start();
+
+ check_checksums_nb_error($node, 1, qr/invalid page in block $blkno/);
+
+ my $expected_checksum;
+ my $found_checksum = get_uint16_from_page($new_block,
+ $CHECKSUM_UINT16_OFFSET);
+ if ($offset == $PD_UPPER_UINT16_OFFSET)
+ {
+ # A checksum can't be computed if it's detected as PageIsNew(), so the
+ # function returns NULL for the computed checksum
+ $expected_checksum = '';
+ }
+ else
+ {
+ $expected_checksum = get_uint16_from_page($original_block,
+ $CHECKSUM_UINT16_OFFSET);
+ }
+
+ my $det = get_checksums_errors($node, 1, qr/invalid page in block $blkno/);
+ is($det, "t1|0|0|$expected_checksum|$found_checksum",
+ "The checksums error for modification at offset $offset"
+ . " should be detected");
+
+ $node->stop();
+
+ $new_block = set_uint16_to_page($original_block, $original_data, $offset);
+ is($original_data, get_uint16_from_page($new_block, $offset),
+ "The data at offset $offset should have been restored in memory");
+
+ overwrite_block($filename, $new_block, 0);
+ is($original_data, get_uint16_from_page(get_block($filename, $blkno),
+ $offset),
+ "The data at offset $offset should have been restored on disk");
+
+ $node->start();
+
+ check_checksums_nb_error($node, 0, qr/^$/);
+}
+
+if (exists $ENV{MY_PG_REGRESS})
+{
+ $ENV{PG_REGRESS} = $ENV{MY_PG_REGRESS};
+}
+
+my $node = get_new_node('main');
+
+my %params;
+$params{'extra'} = ['--data-checksums'];
+$node->init(%params);
+
+$node->start();
+
+$ENV{PGOPTIONS} = '--client-min-messages=WARNING';
+
+my ($cmdret, $stdout, $stderr) = $node->psql('postgres', "SELECT"
+ . " current_setting('data_checksums')");
+
+is($stdout, 'on', 'Data checksums should be enabled');
+
+($cmdret, $stdout, $stderr) = $node->psql('postgres', "SELECT"
+ . " current_setting('block_size')");
+
+$BLOCKSIZE = $stdout;
+
+$node->safe_psql(
+ 'postgres', q|
+ CREATE TABLE public.t1(id integer);
+ CREATE INDEX t1_id_idx ON public.t1 (id);
+ INSERT INTO public.t1 SELECT generate_series(1, 100);
+ CREATE VIEW public.v1 AS SELECT * FROM t1;
+ CREATE MATERIALIZED VIEW public.mv1 AS SELECT * FROM t1;
+ CREATE SEQUENCE public.s1;
+ CREATE UNLOGGED TABLE public.u_t1(id integer);
+ CREATE INDEX u_t1_id_idx ON public.u_t1 (id);
+ INSERT INTO public.u_t1 SELECT generate_series(1, 100);
+ CHECKPOINT;
+|);
+
+# Check sane behavior on various objects type, including those that don't have
+# a storage.
+is(check_checksums_call($node, 't1'), '1', 'Can check a table');
+is(check_checksums_call($node, 't1_id_idx'), '1', 'Can check an index');
+is(check_checksums_call($node, 'v1'), '', 'Cannot check a view');
+is(check_checksums_call($node, 'mv1'), '1', 'Can check a materialized view');
+is(check_checksums_call($node, 's1'), '1', 'Can check a sequence');
+is(check_checksums_call($node, 'u_t1'), '1', 'Can check an unlogged table');
+is(check_checksums_call($node, 'u_t1_id_idx'), '1', 'Can check an unlogged index');
+
+# get the underlying heap absolute path
+($cmdret, $stdout, $stderr) = $node->psql('postgres', "SELECT"
+ . " current_setting('data_directory') || '/' || pg_relation_filepath('t1')"
+);
+
+isnt($stdout, '', 'A relfilenode should be returned');
+
+my $filename = $stdout;
+
+check_checksums_nb_error($node, 0, qr/^$/);
+
+check_pg_stat_database_nb_error($node);
+
+my $fake_uint16 = hex '0x0000';
+
+# Test with a modified checksum. We use a zero checksum here as it's the only
+# one that cannot exist on a checksummed page. We also don't have an easy way
+# to compute what the checksum would be after a modification in a random place
+# in the block.
+corrupt_and_test_block($node, $filename, 0, $CHECKSUM_UINT16_OFFSET,
+ $fake_uint16);
+
+# Test corruption making the block looks like it's PageIsNew().
+corrupt_and_test_block($node, $filename, 0, $PD_UPPER_UINT16_OFFSET,
+ $fake_uint16);
diff --git a/src/tools/msvc/Mkvcbuild.pm b/src/tools/msvc/Mkvcbuild.pm
index 90594bd41b..adbabba3c4 100644
--- a/src/tools/msvc/Mkvcbuild.pm
+++ b/src/tools/msvc/Mkvcbuild.pm
@@ -50,7 +50,8 @@ my @contrib_excludes = (
'pgcrypto', 'sepgsql',
'brin', 'test_extensions',
'test_misc', 'test_pg_dump',
- 'snapshot_too_old', 'unsafe_tests');
+ 'snapshot_too_old', 'unsafe_tests',
+ 'check_relation');
# Set of variables for frontend modules
my $frontend_defines = { 'initdb' => 'FRONTEND' };
--
2.20.1