From 87baf73f56f688f4532ef78f6684934a47be3ba2 Mon Sep 17 00:00:00 2001 From: Kyotaro Horiguchi Date: Thu, 25 Oct 2018 16:49:46 +0900 Subject: [PATCH 2/2] Common'ize file type checker for checksums pg_verify_checksums.c and basebackup.c has the same notion of 'files that have checksums'. This patch moves the core logic so that src/common and the both files share the logic. --- src/backend/replication/basebackup.c | 43 +++-- src/bin/pg_verify_checksums/Makefile | 3 +- src/bin/pg_verify_checksums/pg_verify_checksums.c | 220 +--------------------- src/common/Makefile | 3 +- src/common/file_checksums.c | 197 +++++++++++++++++++ src/include/common/file_checksums.h | 42 +++++ 6 files changed, 273 insertions(+), 235 deletions(-) create mode 100644 src/common/file_checksums.c create mode 100644 src/include/common/file_checksums.h diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c index b20f6c379c..4ebc969f3d 100644 --- a/src/backend/replication/basebackup.c +++ b/src/backend/replication/basebackup.c @@ -19,6 +19,7 @@ #include "access/xlog_internal.h" /* for pg_start/stop_backup */ #include "catalog/pg_type.h" #include "common/file_perm.h" +#include "common/file_checksums.h" #include "lib/stringinfo.h" #include "libpq/libpq.h" #include "libpq/pqformat.h" @@ -187,18 +188,6 @@ static const char *excludeFiles[] = NULL }; -/* - * List of files excluded from checksum validation. - */ -static const char *const noChecksumFiles[] = { - "pg_control", - "pg_filenode.map", - "pg_internal.init", - "PG_VERSION", - NULL, -}; - - /* * Called when ERROR or FATAL happens in perform_base_backup() after * we have started the backup - make sure we end it! @@ -1321,22 +1310,36 @@ sendDir(const char *path, int basepathlen, bool sizeonly, List *tablespaces, static bool is_checksummed_file(const char *fullpath, const char *filename) { - const char *const *f; + checksum_scan_context ctx; /* Check that the file is in a tablespace */ if (strncmp(fullpath, "./global/", 9) == 0 || strncmp(fullpath, "./base/", 7) == 0 || strncmp(fullpath, "/", 1) == 0) { - /* Compare file against noChecksumFiles skiplist */ - for (f = noChecksumFiles; *f; f++) - if (strcmp(*f, filename) == 0) - return false; + /* check if the file has checksums */ + switch (checksum_find_file_type(fullpath, NULL, &ctx)) + { + case HEAP_TO_SCAN: + return true; + case STAT_FAILED: + ereport(ERROR, + (errcode_for_file_access(), + errmsg("failed to stat \"%s\": %m", + fullpath))); - return true; + case ENTRY_TO_IGNORE: + case FILE_TO_SKIP: + case DIR_TO_SKIP: + case DIR_TO_SCAN: + break; + case FILE_UNKNOWN: + elog(DEBUG1, "checksum verification was skipped for unknown file: %s", fullpath); + break; + } } - else - return false; + + return false; } /***** diff --git a/src/bin/pg_verify_checksums/Makefile b/src/bin/pg_verify_checksums/Makefile index cfe4ab1b8b..3d0a9baf24 100644 --- a/src/bin/pg_verify_checksums/Makefile +++ b/src/bin/pg_verify_checksums/Makefile @@ -15,7 +15,8 @@ subdir = src/bin/pg_verify_checksums top_builddir = ../../.. include $(top_builddir)/src/Makefile.global -OBJS= pg_verify_checksums.o $(WIN32RES) +OBJS= pg_verify_checksums.o $(top_builddir)/src/common/file_checksums.o \ + $(WIN32RES) all: pg_verify_checksums diff --git a/src/bin/pg_verify_checksums/pg_verify_checksums.c b/src/bin/pg_verify_checksums/pg_verify_checksums.c index 4b527913c1..dc2143ea65 100644 --- a/src/bin/pg_verify_checksums/pg_verify_checksums.c +++ b/src/bin/pg_verify_checksums/pg_verify_checksums.c @@ -15,7 +15,7 @@ #include "catalog/pg_control.h" #include "common/controldata_utils.h" -#include "common/relpath.h" +#include "common/file_checksums.h" #include "getopt_long.h" #include "pg_getopt.h" #include "storage/bufpage.h" @@ -36,46 +36,6 @@ static bool verbose = false; static const char *progname; -/* struct for checksum verification paremter*/ -typedef struct -{ - union - { - struct - { - BlockNumber segmentno; - } heap_param; - } params; -} checksum_scan_context; - -/* enum for return value of find_file_type */ -typedef enum -{ - ENTRY_TO_IGNORE, - DIR_TO_SCAN, - HEAP_TO_SCAN, - FILE_TO_SKIP, - DIR_TO_SKIP, - FILE_UNKNOWN -} checksum_file_types; - -/* black (explisit exclusion) list for checksum verification */ -static const char *const checksum_known_to_skip[] = { - "pg_control", - "pg_internal.init", - "pg_filenode.map", - "PG_VERSION", - "config_exec_params", - "config_exec_params.new", - "pgsql_tmp", /* this is a directory */ - NULL -}; - -static checksum_file_types find_file_type(const char *fn, - const char *relfilenode, - checksum_scan_context *ctx); - - static void usage(void) { @@ -93,71 +53,6 @@ usage(void) printf(_("Report bugs to .\n")); } -/* - * isRelFileName - * - * Check if the given file name is authorized for checksum verification. - */ -static bool -isRelFileName(const char *fn) -{ - int pos; - - /*---------- - * Only files including data checksums are authorized for verification. - * This is guessed based on the file name by reverse-engineering - * GetRelationPath() so make sure to update both code paths if any - * updates are done. The following file name formats are allowed: - * - * . - * _ - * _. - * - * Note that temporary files, beginning with 't', are also skipped. - * - *---------- - */ - - /* A non-empty string of digits should follow */ - for (pos = 0; isdigit((unsigned char) fn[pos]); ++pos) - ; - /* leave if no digits */ - if (pos == 0) - return false; - /* good to go if only digits */ - if (fn[pos] == '\0') - return true; - - /* Authorized fork files can be scanned */ - if (fn[pos] == '_') - { - int forkchar = forkname_chars(&fn[pos + 1], NULL); - - if (forkchar <= 0) - return false; - - pos += forkchar + 1; - } - - /* Check for an optional segment number */ - if (fn[pos] == '.') - { - int segchar; - - for (segchar = 1; isdigit((unsigned char) fn[pos + segchar]); ++segchar) - ; - - if (segchar <= 1) - return false; - pos += segchar; - } - - /* Now this should be the end */ - if (fn[pos] != '\0') - return false; - return true; -} - static void scan_heap_file(const char *fn, checksum_scan_context *ctx) { @@ -234,7 +129,7 @@ scan_directory(const char *basedir, const char *subdir) checksum_scan_context ctx; snprintf(fn, sizeof(fn), "%s/%s", path, de->d_name); - switch (find_file_type(fn, only_relfilenode, &ctx)) + switch (checksum_find_file_type(fn, only_relfilenode, &ctx)) { case ENTRY_TO_IGNORE: continue; /* ignore completely silently */ @@ -262,118 +157,16 @@ scan_directory(const char *basedir, const char *subdir) case DIR_TO_SCAN: scan_directory(path, de->d_name); break; + case STAT_FAILED: + fprintf(stderr, _("%s: could not stat file \"%s\": %s\n"), + progname, fn, strerror(errno)); + exit(1); } } closedir(dir); } -/* - * find_file_type: identify what to do on a file - * - * fn is a file path in full path or relative down from the current directory. - * relfilenode is filter string of file. Only specified files of node number or - * databaseid/filenodenum will be verified checksum. - * ctx is the parameter needed for following checksum scan. - */ -static checksum_file_types -find_file_type(const char *fn, const char *relfilenode, - checksum_scan_context *ctx) -{ - struct stat st; - char fnonly[MAXPGPATH]; - const char *fname; - char *forkpath; - char *segmentpath; - const char *const *p; - bool is_subdir = false; - - /* find file name the full path */ - fname = strrchr(fn, '/'); - if (fname) - fname++; - else - fname = fn; - - if (strcmp(fname, ".") == 0 || - strcmp(fname, "..") == 0) - return ENTRY_TO_IGNORE; - - if (lstat(fn, &st) < 0) - { - fprintf(stderr, _("%s: could not stat file \"%s\": %s\n"), - progname, fn, strerror(errno)); - exit(1); - } - -#ifndef WIN32 - if (S_ISDIR(st.st_mode) || S_ISLNK(st.st_mode)) -#else - if (S_ISDIR(st.st_mode) || pgwin32_is_junction(fn)) -#endif - is_subdir = true; - - /* exluded by blacklist */ - - for (p = checksum_known_to_skip ; *p ; p++) - { - if (strcmp(*p, fname) != 0) - continue; - - if (!is_subdir) - return FILE_TO_SKIP; - else - return DIR_TO_SKIP; - } - - if (is_subdir) - return DIR_TO_SCAN; - - /* we now know only of relfiles */ - if (isRelFileName(fname)) - { - /* copy the path so that we can scribble on it */ - strlcpy(fnonly, fn, sizeof(fnonly)); - ctx->params.heap_param.segmentno = 0; - segmentpath = strchr(fnonly, '.'); - - /* make sure that the dot is in the last segment in the path */ - if (segmentpath != NULL && strchr(segmentpath, '/') == NULL) - { - *segmentpath++ = '\0'; - ctx->params.heap_param.segmentno = atoi(segmentpath); - - /* something's wrong, treat it as unknown file */ - if (ctx->params.heap_param.segmentno == 0) - return FILE_UNKNOWN; - } - - if (only_relfilenode) - { - char *p; - - /* find file suffix if any */ - forkpath = strrchr(fnonly, '_'); - - /* the underscore must be in the last segment in the path */ - if (forkpath != NULL && strchr(forkpath, '/') == NULL) - *forkpath++ = '\0'; - - /* make a tail match with only_relfilenode */ - p = fnonly + strlen(fnonly) - strlen(relfilenode); - if (fnonly > p || /* cannot match*/ - (fnonly < p && *(p-1) != '/') || /* avoid false match */ - strcmp(relfilenode, p) != 0) - /* Relfilenode not to be included */ - return FILE_TO_SKIP; - } - - return HEAP_TO_SCAN; - } - - return FILE_UNKNOWN; -} - int main(int argc, char *argv[]) { @@ -397,6 +190,7 @@ main(int argc, char *argv[]) if (strcmp(argv[1], "--help") == 0 || strcmp(argv[1], "-?") == 0) { usage(); + exit(0); } if (strcmp(argv[1], "--version") == 0 || strcmp(argv[1], "-V") == 0) diff --git a/src/common/Makefile b/src/common/Makefile index ec8139f014..54b7c9f440 100644 --- a/src/common/Makefile +++ b/src/common/Makefile @@ -44,7 +44,8 @@ override CPPFLAGS += -DVAL_LIBS="\"$(LIBS)\"" override CPPFLAGS := -DFRONTEND $(CPPFLAGS) LIBS += $(PTHREAD_LIBS) -OBJS_COMMON = base64.o config_info.o controldata_utils.o exec.o file_perm.o \ +OBJS_COMMON = base64.o config_info.o controldata_utils.o exec.o \ + file_checksums.o file_perm.o \ ip.o keywords.o link-canary.o md5.o pg_lzcompress.o \ pgfnames.o psprintf.o relpath.o \ rmtree.o saslprep.o scram-common.o string.o unicode_norm.o \ diff --git a/src/common/file_checksums.c b/src/common/file_checksums.c new file mode 100644 index 0000000000..f83bb52c1d --- /dev/null +++ b/src/common/file_checksums.c @@ -0,0 +1,197 @@ +/*------------------------------------------------------------------------- + * file_checksums.c + * checksumming files + * + * This implements Unicode normalization, per the documentation at + * http://www.unicode.org/reports/tr15/. + * + * Portions Copyright (c) 2018, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/common/file_checksums.c + * + *------------------------------------------------------------------------- + */ +#include + +#include "c.h" +#include "common/file_checksums.h" +#include "common/relpath.h" + +/* black (explisit exclusion) list for checksum verification */ +static const char *const checksum_known_to_skip[] = { + "pg_control", + "pg_internal.init", + "pg_filenode.map", + "PG_VERSION", + "config_exec_params", + "config_exec_params.new", + "pgsql_tmp", /* directory */ + NULL +}; + +/* + * isRelFileName + * + * Check if the given file name is authorized for checksum verification. + */ +static bool +isRelFileName(const char *fn) +{ + int pos; + + /*---------- + * Only files including data checksums are authorized for verification. + * This is guessed based on the file name by reverse-engineering + * GetRelationPath() so make sure to update both code paths if any + * updates are done. The following file name formats are allowed: + * + * . + * _ + * _. + * + * Note that temporary files, beginning with 't', are also skipped. + * + *---------- + */ + + /* A non-empty string of digits should follow */ + for (pos = 0; isdigit((unsigned char) fn[pos]); ++pos) + ; + /* leave if no digits */ + if (pos == 0) + return false; + /* good to go if only digits */ + if (fn[pos] == '\0') + return true; + + /* Authorized fork files can be scanned */ + if (fn[pos] == '_') + { + int forkchar = forkname_chars(&fn[pos + 1], NULL); + + if (forkchar <= 0) + return false; + + pos += forkchar + 1; + } + + /* Check for an optional segment number */ + if (fn[pos] == '.') + { + int segchar; + + for (segchar = 1; isdigit((unsigned char) fn[pos + segchar]); ++segchar) + ; + + if (segchar <= 1) + return false; + pos += segchar; + } + + /* Now this should be the end */ + if (fn[pos] != '\0') + return false; + return true; +} + +/* + * checksum_find_file_type: identify a file from the viewpoint of checksum + * + * fn is file name with full path to check + * relfilenode is relfilenode in string to exclude files other than that. + * ctx is the context to scan checksum, which contains parameters for scanners. + */ +checksum_file_types +checksum_find_file_type(const char *fn, + const char *relfilenode, checksum_scan_context *ctx) +{ + struct stat st; + char fnonly[MAXPGPATH]; + char *fname; + char *forkpath; + char *segmentpath; + const char *const *p; + bool is_subdir = false; + + fname = strrchr(fn, '/'); + + if (fname == NULL) + return ENTRY_TO_IGNORE; + + fname++; + + if (strcmp(fname, ".") == 0 || + strcmp(fname, "..") == 0) + return ENTRY_TO_IGNORE; + + if (lstat(fn, &st) < 0) + return STAT_FAILED; + +#ifndef WIN32 + if (S_ISDIR(st.st_mode) || S_ISLNK(st.st_mode)) +#else + if (S_ISDIR(st.st_mode) || pgwin32_is_junction(fn)) +#endif + is_subdir = true; + + /* exluded by blacklist */ + + for (p = checksum_known_to_skip ; *p ; p++) + { + if (strcmp(*p, fname) != 0) + continue; + + if (is_subdir) + return DIR_TO_SKIP; + + return FILE_TO_SKIP; + } + + if (is_subdir) + return DIR_TO_SCAN; + + /* we now know only of relfiles */ + if (isRelFileName(fname)) + { + /* copy the path so that we can scribble on it */ + strlcpy(fnonly, fn, sizeof(fnonly)); + ctx->params.heap_param.segmentno = 0; + segmentpath = strchr(fnonly, '.'); + + /* make sure that the dot is in the last segment in the path */ + if (segmentpath != NULL && strchr(segmentpath, '/') == NULL) + { + *segmentpath++ = '\0'; + ctx->params.heap_param.segmentno = atoi(segmentpath); + + /* something's wrong, treat it as unknown file */ + if (ctx->params.heap_param.segmentno == 0) + return FILE_UNKNOWN; + } + + if (relfilenode) + { + char *p; + + /* find file suffix if any */ + forkpath = strrchr(fnonly, '_'); + + /* the underscore must be in the last segment in the path */ + if (forkpath != NULL && strchr(forkpath, '/') == NULL) + *forkpath++ = '\0'; + + /* make a tail match with only_relfilenode */ + p = fnonly + strlen(fnonly) - strlen(relfilenode); + if (fnonly > p || /* cannot match*/ + (fnonly < p && *(p-1) != '/') || /* avoid false match */ + strcmp(relfilenode, p) != 0) + /* Relfilenode not to be included */ + return FILE_TO_SKIP; + } + + return HEAP_TO_SCAN; + } + + return FILE_UNKNOWN; +} diff --git a/src/include/common/file_checksums.h b/src/include/common/file_checksums.h new file mode 100644 index 0000000000..3ead25c97f --- /dev/null +++ b/src/include/common/file_checksums.h @@ -0,0 +1,42 @@ +/* + * file_checksums.h + * checksumming files + * + * Copyright (c) 2018, PostgreSQL Global Development Group + * + * src/include/common/file_checksums.h + */ +#ifndef FILE_CHECKSUMS_H +#define FILE_CHECKSUMS_H + +#include "storage/block.h" + +/* struct for checksum verification paremter*/ +typedef struct +{ + union + { + struct + { + BlockNumber segmentno; + } heap_param; + } params; +} checksum_scan_context; + +/* enum for return value of find_file_type */ +typedef enum +{ + ENTRY_TO_IGNORE, + DIR_TO_SCAN, + HEAP_TO_SCAN, + FILE_TO_SKIP, + DIR_TO_SKIP, + FILE_UNKNOWN, + STAT_FAILED +} checksum_file_types; + +checksum_file_types checksum_find_file_type(const char *fn, + const char *relfilenode, + checksum_scan_context *ctx); + +#endif /* FILE_CHECKSUMS_H */ -- 2.16.3