From aef9d67db637d9e9bd7e9b7c2381e952a891e141 Mon Sep 17 00:00:00 2001 From: Bharath Rupireddy Date: Thu, 3 Feb 2022 04:13:00 +0000 Subject: [PATCH v7] Replace ReadDir with ReadDirExtended Replace ReadDir with ReadDirExtended (in CheckPointSnapBuild) and get rid of lstat entirely. We still use ReadDir in CheckPointLogicalRewriteHeap because unable to read directory would result a NULL from ReadDirExtended and we may miss to fsync the remaining map files, so here let's error out with ReadDir. With this change, the checkpoint will only care about the snapshot and mapping files and not fail if it finds other files in the directories. Removing lstat enables us to make things faster as we avoid extra system calls. Also, convert "could not parse filename" and "could not remove file" errors to LOG messages in CheckPointLogicalRewriteHeap. This will enable checkpoint not to waste the amount of work that it had done. --- src/backend/access/heap/rewriteheap.c | 27 ++++++++++++++++----- src/backend/replication/logical/snapbuild.c | 9 +------ 2 files changed, 22 insertions(+), 14 deletions(-) diff --git a/src/backend/access/heap/rewriteheap.c b/src/backend/access/heap/rewriteheap.c index 2a53826736..24e7a35881 100644 --- a/src/backend/access/heap/rewriteheap.c +++ b/src/backend/access/heap/rewriteheap.c @@ -1213,7 +1213,6 @@ CheckPointLogicalRewriteHeap(void) mappings_dir = AllocateDir("pg_logical/mappings"); while ((mapping_de = ReadDir(mappings_dir, "pg_logical/mappings")) != NULL) { - struct stat statbuf; Oid dboid; Oid relid; XLogRecPtr lsn; @@ -1227,26 +1226,42 @@ CheckPointLogicalRewriteHeap(void) continue; snprintf(path, sizeof(path), "pg_logical/mappings/%s", mapping_de->d_name); - if (lstat(path, &statbuf) == 0 && !S_ISREG(statbuf.st_mode)) - continue; /* Skip over files that cannot be ours. */ if (strncmp(mapping_de->d_name, "map-", 4) != 0) continue; + /* + * We just log a message if a file doesn't fit the pattern, it's + * probably some editors lock/state file or similar... + */ if (sscanf(mapping_de->d_name, LOGICAL_REWRITE_FORMAT, &dboid, &relid, &hi, &lo, &rewrite_xid, &create_xid) != 6) - elog(ERROR, "could not parse filename \"%s\"", mapping_de->d_name); + { + ereport(LOG, + (errmsg("could not parse file name \"%s\"", path))); + continue; + } lsn = ((uint64) hi) << 32 | lo; if (lsn < cutoff || cutoff == InvalidXLogRecPtr) { elog(DEBUG1, "removing logical rewrite file \"%s\"", path); + + /* + * It's not particularly harmful, though strange, if we can't + * remove the file here. Don't prevent the checkpoint from + * completing, that'd be a cure worse than the disease. + */ if (unlink(path) < 0) - ereport(ERROR, + { + ereport(LOG, (errcode_for_file_access(), - errmsg("could not remove file \"%s\": %m", path))); + errmsg("could not remove file \"%s\": %m", + path))); + continue; + } } else { diff --git a/src/backend/replication/logical/snapbuild.c b/src/backend/replication/logical/snapbuild.c index 83fca8a77d..a4bf7a7fd9 100644 --- a/src/backend/replication/logical/snapbuild.c +++ b/src/backend/replication/logical/snapbuild.c @@ -1942,12 +1942,11 @@ CheckPointSnapBuild(void) cutoff = redo; snap_dir = AllocateDir("pg_logical/snapshots"); - while ((snap_de = ReadDir(snap_dir, "pg_logical/snapshots")) != NULL) + while ((snap_de = ReadDirExtended(snap_dir, "pg_logical/snapshots", LOG)) != NULL) { uint32 hi; uint32 lo; XLogRecPtr lsn; - struct stat statbuf; if (strcmp(snap_de->d_name, ".") == 0 || strcmp(snap_de->d_name, "..") == 0) @@ -1955,12 +1954,6 @@ CheckPointSnapBuild(void) snprintf(path, sizeof(path), "pg_logical/snapshots/%s", snap_de->d_name); - if (lstat(path, &statbuf) == 0 && !S_ISREG(statbuf.st_mode)) - { - elog(DEBUG1, "only regular files expected: %s", path); - continue; - } - /* * temporary filenames from SnapBuildSerialize() include the LSN and * everything but are postfixed by .$pid.tmp. We can just remove them -- 2.25.1