From 187d84a0ffc94fb9d5c9c0f6708227cc8f47fa3c Mon Sep 17 00:00:00 2001 From: Kyotaro Horiguchi Date: Thu, 25 Oct 2018 16:48:47 +0900 Subject: [PATCH 1/2] Make pg_verify_checksums conscious of unknown files --- src/bin/pg_verify_checksums/pg_verify_checksums.c | 225 +++++++++++++++++----- 1 file changed, 179 insertions(+), 46 deletions(-) diff --git a/src/bin/pg_verify_checksums/pg_verify_checksums.c b/src/bin/pg_verify_checksums/pg_verify_checksums.c index f0e09bea20..4b527913c1 100644 --- a/src/bin/pg_verify_checksums/pg_verify_checksums.c +++ b/src/bin/pg_verify_checksums/pg_verify_checksums.c @@ -26,6 +26,9 @@ static int64 files = 0; static int64 blocks = 0; static int64 badblocks = 0; +static int64 skipped_known = 0; +static int64 skipped_unknown = 0; +static int64 skipped_dirs = 0; static ControlFileData *ControlFile; static char *only_relfilenode = NULL; @@ -33,6 +36,46 @@ static bool verbose = false; static const char *progname; +/* struct for checksum verification paremter*/ +typedef struct +{ + union + { + struct + { + BlockNumber segmentno; + } heap_param; + } params; +} checksum_scan_context; + +/* enum for return value of find_file_type */ +typedef enum +{ + ENTRY_TO_IGNORE, + DIR_TO_SCAN, + HEAP_TO_SCAN, + FILE_TO_SKIP, + DIR_TO_SKIP, + FILE_UNKNOWN +} checksum_file_types; + +/* black (explisit exclusion) list for checksum verification */ +static const char *const checksum_known_to_skip[] = { + "pg_control", + "pg_internal.init", + "pg_filenode.map", + "PG_VERSION", + "config_exec_params", + "config_exec_params.new", + "pgsql_tmp", /* this is a directory */ + NULL +}; + +static checksum_file_types find_file_type(const char *fn, + const char *relfilenode, + checksum_scan_context *ctx); + + static void usage(void) { @@ -116,11 +159,12 @@ isRelFileName(const char *fn) } static void -scan_file(const char *fn, BlockNumber segmentno) +scan_heap_file(const char *fn, checksum_scan_context *ctx) { PGAlignedBlock buf; PageHeader header = (PageHeader) buf.data; int f; + BlockNumber segmentno = ctx->params.heap_param.segmentno; BlockNumber blockno; f = open(fn, O_RDONLY | PG_BINARY, 0); @@ -187,63 +231,147 @@ scan_directory(const char *basedir, const char *subdir) while ((de = readdir(dir)) != NULL) { char fn[MAXPGPATH]; - struct stat st; - - if (!isRelFileName(de->d_name)) - continue; + checksum_scan_context ctx; snprintf(fn, sizeof(fn), "%s/%s", path, de->d_name); - if (lstat(fn, &st) < 0) + switch (find_file_type(fn, only_relfilenode, &ctx)) { - fprintf(stderr, _("%s: could not stat file \"%s\": %s\n"), - progname, fn, strerror(errno)); - exit(1); + case ENTRY_TO_IGNORE: + continue; /* ignore completely silently */ + case FILE_TO_SKIP: + if (verbose) + fprintf(stderr, "skipped file: %s/%s/%s\n", + basedir, subdir, de->d_name); + skipped_known++; + continue; + case DIR_TO_SKIP: + if (verbose) + fprintf(stderr, "skipped directory: %s/%s/%s\n", + basedir, subdir, de->d_name); + skipped_dirs++; + continue; + case FILE_UNKNOWN: + if (verbose) + fprintf(stderr, "skipped unknown file: %s/%s/%s\n", + basedir, subdir, de->d_name); + skipped_unknown++; + continue; + case HEAP_TO_SCAN: + scan_heap_file(fn, &ctx); + break; + case DIR_TO_SCAN: + scan_directory(path, de->d_name); + break; } - if (S_ISREG(st.st_mode)) + } + + closedir(dir); +} + +/* + * find_file_type: identify what to do on a file + * + * fn is a file path in full path or relative down from the current directory. + * relfilenode is filter string of file. Only specified files of node number or + * databaseid/filenodenum will be verified checksum. + * ctx is the parameter needed for following checksum scan. + */ +static checksum_file_types +find_file_type(const char *fn, const char *relfilenode, + checksum_scan_context *ctx) +{ + struct stat st; + char fnonly[MAXPGPATH]; + const char *fname; + char *forkpath; + char *segmentpath; + const char *const *p; + bool is_subdir = false; + + /* find file name the full path */ + fname = strrchr(fn, '/'); + if (fname) + fname++; + else + fname = fn; + + if (strcmp(fname, ".") == 0 || + strcmp(fname, "..") == 0) + return ENTRY_TO_IGNORE; + + if (lstat(fn, &st) < 0) + { + fprintf(stderr, _("%s: could not stat file \"%s\": %s\n"), + progname, fn, strerror(errno)); + exit(1); + } + +#ifndef WIN32 + if (S_ISDIR(st.st_mode) || S_ISLNK(st.st_mode)) +#else + if (S_ISDIR(st.st_mode) || pgwin32_is_junction(fn)) +#endif + is_subdir = true; + + /* exluded by blacklist */ + + for (p = checksum_known_to_skip ; *p ; p++) + { + if (strcmp(*p, fname) != 0) + continue; + + if (!is_subdir) + return FILE_TO_SKIP; + else + return DIR_TO_SKIP; + } + + if (is_subdir) + return DIR_TO_SCAN; + + /* we now know only of relfiles */ + if (isRelFileName(fname)) + { + /* copy the path so that we can scribble on it */ + strlcpy(fnonly, fn, sizeof(fnonly)); + ctx->params.heap_param.segmentno = 0; + segmentpath = strchr(fnonly, '.'); + + /* make sure that the dot is in the last segment in the path */ + if (segmentpath != NULL && strchr(segmentpath, '/') == NULL) { - char fnonly[MAXPGPATH]; - char *forkpath, - *segmentpath; - BlockNumber segmentno = 0; + *segmentpath++ = '\0'; + ctx->params.heap_param.segmentno = atoi(segmentpath); - /* - * Cut off at the segment boundary (".") to get the segment number - * in order to mix it into the checksum. Then also cut off at the - * fork boundary, to get the relfilenode the file belongs to for - * filtering. - */ - strlcpy(fnonly, de->d_name, sizeof(fnonly)); - segmentpath = strchr(fnonly, '.'); - if (segmentpath != NULL) - { - *segmentpath++ = '\0'; - segmentno = atoi(segmentpath); - if (segmentno == 0) - { - fprintf(stderr, _("%s: invalid segment number %d in file name \"%s\"\n"), - progname, segmentno, fn); - exit(1); - } - } + /* something's wrong, treat it as unknown file */ + if (ctx->params.heap_param.segmentno == 0) + return FILE_UNKNOWN; + } + + if (only_relfilenode) + { + char *p; - forkpath = strchr(fnonly, '_'); - if (forkpath != NULL) + /* find file suffix if any */ + forkpath = strrchr(fnonly, '_'); + + /* the underscore must be in the last segment in the path */ + if (forkpath != NULL && strchr(forkpath, '/') == NULL) *forkpath++ = '\0'; - if (only_relfilenode && strcmp(only_relfilenode, fnonly) != 0) + /* make a tail match with only_relfilenode */ + p = fnonly + strlen(fnonly) - strlen(relfilenode); + if (fnonly > p || /* cannot match*/ + (fnonly < p && *(p-1) != '/') || /* avoid false match */ + strcmp(relfilenode, p) != 0) /* Relfilenode not to be included */ - continue; - - scan_file(fn, segmentno); + return FILE_TO_SKIP; } -#ifndef WIN32 - else if (S_ISDIR(st.st_mode) || S_ISLNK(st.st_mode)) -#else - else if (S_ISDIR(st.st_mode) || pgwin32_is_junction(fn)) -#endif - scan_directory(path, de->d_name); + + return HEAP_TO_SCAN; } - closedir(dir); + + return FILE_UNKNOWN; } int @@ -359,6 +487,11 @@ main(int argc, char *argv[]) printf(_("Files scanned: %s\n"), psprintf(INT64_FORMAT, files)); printf(_("Blocks scanned: %s\n"), psprintf(INT64_FORMAT, blocks)); printf(_("Bad checksums: %s\n"), psprintf(INT64_FORMAT, badblocks)); + printf(_("Files skipped: %s\n"), psprintf(INT64_FORMAT, skipped_known)); + printf(_("Unknown files skipped: %s\n"), + psprintf(INT64_FORMAT, skipped_unknown)); + printf(_("Skipped directories: %s\n"), + psprintf(INT64_FORMAT, skipped_dirs)); if (badblocks > 0) return 1; -- 2.16.3