From 0da6d78690f8f45a8cf4e1e0caed8e53a9a3d010 Mon Sep 17 00:00:00 2001 From: Jeevan Chalke Date: Mon, 9 Sep 2019 10:56:43 +0530 Subject: [PATCH v2 2/4] Refactor code in basebackup.c - Refactor full backup code to the separate function. - Refactor checksum verifying logic to the separate function. - Refactor tablespace mapping code. --- src/backend/replication/basebackup.c | 348 +++++++++++++++++++--------------- src/bin/pg_basebackup/pg_basebackup.c | 129 +------------ src/fe_utils/simple_list.c | 104 ++++++++++ src/include/fe_utils/simple_list.h | 21 ++ 4 files changed, 334 insertions(+), 268 deletions(-) diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c index e72bf8e..bf15262 100644 --- a/src/backend/replication/basebackup.c +++ b/src/backend/replication/basebackup.c @@ -75,6 +75,14 @@ static void SendXlogRecPtrResult(XLogRecPtr ptr, TimeLineID tli); static int compareWalFileNames(const ListCell *a, const ListCell *b); static void throttle(size_t increment); static bool is_checksummed_file(const char *fullpath, const char *filename); +static void verify_page_checksum(const char *readfilename, FILE *fp, + char *page, int backup_distance, + BlockNumber blkno, int segmentno, + int *checksum_failures); +static pgoff_t sendCompleteFile(const char *readfilename, + const char *tarfilename, FILE *fp, + struct stat *statbuf, int segmentno, + bool verify_checksum, int *checksum_failures); /* Was the backup currently in-progress initiated in recovery mode? */ static bool backup_started_in_recovery = false; @@ -1391,17 +1399,11 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf bool missing_ok, Oid dboid) { FILE *fp; - BlockNumber blkno = 0; - bool block_retry = false; char buf[TAR_SEND_SIZE]; - uint16 checksum; int checksum_failures = 0; off_t cnt; - int i; pgoff_t len = 0; - char *page; size_t pad; - PageHeader phdr; int segmentno = 0; char *segmentpath; bool verify_checksum = false; @@ -1416,8 +1418,6 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf errmsg("could not open file \"%s\": %m", readfilename))); } - _tarWriteHeader(tarfilename, NULL, statbuf, false); - if (!noverify_checksums && DataChecksumsEnabled()) { char *filename; @@ -1449,146 +1449,9 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf } } - while ((cnt = fread(buf, 1, Min(sizeof(buf), statbuf->st_size - len), fp)) > 0) - { - /* - * The checksums are verified at block level, so we iterate over the - * buffer in chunks of BLCKSZ, after making sure that - * TAR_SEND_SIZE/buf is divisible by BLCKSZ and we read a multiple of - * BLCKSZ bytes. - */ - Assert(TAR_SEND_SIZE % BLCKSZ == 0); - - if (verify_checksum && (cnt % BLCKSZ != 0)) - { - ereport(WARNING, - (errmsg("cannot verify checksum in file \"%s\", block " - "%d: read buffer size %d and page size %d " - "differ", - readfilename, blkno, (int) cnt, BLCKSZ))); - verify_checksum = false; - } - - if (verify_checksum) - { - for (i = 0; i < cnt / BLCKSZ; i++) - { - page = buf + BLCKSZ * i; - - /* - * Only check pages which have not been modified since the - * start of the base backup. Otherwise, they might have been - * written only halfway and the checksum would not be valid. - * However, replaying WAL would reinstate the correct page in - * this case. We also skip completely new pages, since they - * don't have a checksum yet. - */ - if (!PageIsNew(page) && PageGetLSN(page) < startptr) - { - checksum = pg_checksum_page((char *) page, blkno + segmentno * RELSEG_SIZE); - phdr = (PageHeader) page; - if (phdr->pd_checksum != checksum) - { - /* - * Retry the block on the first failure. It's - * possible that we read the first 4K page of the - * block just before postgres updated the entire block - * so it ends up looking torn to us. We only need to - * retry once because the LSN should be updated to - * something we can ignore on the next pass. If the - * error happens again then it is a true validation - * failure. - */ - if (block_retry == false) - { - /* Reread the failed block */ - if (fseek(fp, -(cnt - BLCKSZ * i), SEEK_CUR) == -1) - { - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not fseek in file \"%s\": %m", - readfilename))); - } - - if (fread(buf + BLCKSZ * i, 1, BLCKSZ, fp) != BLCKSZ) - { - /* - * If we hit end-of-file, a concurrent - * truncation must have occurred, so break out - * of this loop just as if the initial fread() - * returned 0. We'll drop through to the same - * code that handles that case. (We must fix - * up cnt first, though.) - */ - if (feof(fp)) - { - cnt = BLCKSZ * i; - break; - } - - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not reread block %d of file \"%s\": %m", - blkno, readfilename))); - } - - if (fseek(fp, cnt - BLCKSZ * i - BLCKSZ, SEEK_CUR) == -1) - { - ereport(ERROR, - (errcode_for_file_access(), - errmsg("could not fseek in file \"%s\": %m", - readfilename))); - } - - /* Set flag so we know a retry was attempted */ - block_retry = true; - - /* Reset loop to validate the block again */ - i--; - continue; - } - - checksum_failures++; - - if (checksum_failures <= 5) - ereport(WARNING, - (errmsg("checksum verification failed in " - "file \"%s\", block %d: calculated " - "%X but expected %X", - readfilename, blkno, checksum, - phdr->pd_checksum))); - if (checksum_failures == 5) - ereport(WARNING, - (errmsg("further checksum verification " - "failures in file \"%s\" will not " - "be reported", readfilename))); - } - } - block_retry = false; - blkno++; - } - } - - /* Send the chunk as a CopyData message */ - if (pq_putmessage('d', buf, cnt)) - ereport(ERROR, - (errmsg("base backup could not send data, aborting backup"))); - - len += cnt; - throttle(cnt); - - if (feof(fp) || len >= statbuf->st_size) - { - /* - * Reached end of file. The file could be longer, if it was - * extended while we were sending it, but for a base backup we can - * ignore such extended data. It will be restored from WAL. - */ - break; - } - } - - CHECK_FREAD_ERROR(fp, readfilename); + /* Send complete file to the client. */ + len = sendCompleteFile(readfilename, tarfilename, fp, statbuf, segmentno, + verify_checksum, &checksum_failures); /* If the file was truncated while we were sending it, pad it with zeros */ if (len < statbuf->st_size) @@ -1761,3 +1624,192 @@ throttle(size_t increment) */ throttled_last = GetCurrentTimestamp(); } + +/* + * verify_page_checksum + * + * Verifies checksum for one page. + */ +static void +verify_page_checksum(const char *readfilename, FILE *fp, char *page, + int backup_distance, BlockNumber blkno, int segmentno, + int *checksum_failures) +{ + uint16 checksum; + bool block_retried = false; + PageHeader phdr; + + while (1) + { + /* + * Only check pages which have not been modified since the start of the + * base backup. Otherwise, they might have been written only halfway + * and the checksum would not be valid. However, replaying WAL would + * reinstate the correct page in this case. We also skip completely + * new pages, since they don't have a checksum yet. + */ + if (PageIsNew(page) || PageGetLSN(page) >= startptr) + return; + + checksum = pg_checksum_page(page, blkno + segmentno * RELSEG_SIZE); + phdr = (PageHeader) page; + if (phdr->pd_checksum == checksum) + return; + + /* + * Retry the block on the first failure. It's possible that we + * read the first 4K page of the block just before postgres updated + * the entire block so it ends up looking torn to us. We only need + * to retry once because the LSN should be updated to something we + * can ignore on the next pass. If the error happens again then it + * is a true validation failure. + */ + if (block_retried == false) + { + /* Reread the failed block */ + if (fseek(fp, -backup_distance, SEEK_CUR) == -1) + { + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not fseek in file \"%s\": %m", + readfilename))); + } + + if (fread(page, 1, BLCKSZ, fp) != BLCKSZ) + { + /* + * If we hit end-of-file, return from here. Caller will take + * care of the rest. + */ + if (feof(fp)) + return; + + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not reread block %d of file \"%s\": %m", + blkno, readfilename))); + } + + if (fseek(fp, backup_distance - BLCKSZ, SEEK_CUR) == -1) + { + ereport(ERROR, + (errcode_for_file_access(), + errmsg("could not fseek in file \"%s\": %m", + readfilename))); + } + + /* Set flag so we know a retry was attempted */ + block_retried = true; + + /* Re-validate the block again */ + continue; + } + + (*checksum_failures)++; + + if (*checksum_failures <= 5) + ereport(WARNING, + (errmsg("checksum verification failed in file \"%s\", block %d: calculated %X but expected %X", + readfilename, blkno, checksum, + phdr->pd_checksum))); + if (*checksum_failures == 5) + ereport(WARNING, + (errmsg("further checksum verification failures in file \"%s\" will not be reported", + readfilename))); + + /* + * Note that we want this loop to run only once, so we return from + * here. If we were needed to reread the block, then it was already + * done above. + */ + return; + } +} + +/* + * sendCompleteFile + * + * Sends complete file to the client. + */ +static pgoff_t +sendCompleteFile(const char *readfilename, const char *tarfilename, FILE *fp, + struct stat *statbuf, int segmentno, bool verify_checksum, + int *checksum_failures) +{ + char buf[TAR_SEND_SIZE]; + off_t cnt; + pgoff_t len = 0; + BlockNumber blkno = 0; + int i; + + _tarWriteHeader(tarfilename, NULL, statbuf, false); + + while ((cnt = fread(buf, 1, Min(sizeof(buf), statbuf->st_size - len), fp)) > 0) + { + /* + * The checksums are verified at block level, so we iterate over the + * buffer in chunks of BLCKSZ, after making sure that + * TAR_SEND_SIZE/buf is divisible by BLCKSZ and we read a multiple of + * BLCKSZ bytes. + */ + Assert(TAR_SEND_SIZE % BLCKSZ == 0); + + if (verify_checksum && (cnt % BLCKSZ != 0)) + { + ereport(WARNING, + (errmsg("cannot verify checksum in file \"%s\", block " + "%d: read buffer size %d and page size %d " + "differ", + readfilename, blkno, (int) cnt, BLCKSZ))); + verify_checksum = false; + } + + if (verify_checksum) + { + for (i = 0; i < cnt / BLCKSZ; i++) + { + int page_index_in_buf = (BLCKSZ * i); + + verify_page_checksum(readfilename, fp, buf + page_index_in_buf, + (cnt - page_index_in_buf), blkno, + segmentno, checksum_failures); + + /* + * If we hit end-of-file, a concurrent truncation must have + * occurred, so break out of this loop just as if the initial + * fread() returned 0. We'll drop through to the same code that + * handles that case. (We must fix up cnt first, though.) + */ + if (feof(fp)) + { + cnt = page_index_in_buf; + break; + } + + blkno++; + } + } + + /* Send the chunk as a CopyData message */ + if (pq_putmessage('d', buf, cnt)) + ereport(ERROR, + (errmsg("base backup could not send data, aborting backup"))); + + len += cnt; + throttle(cnt); + + if (feof(fp) || len >= statbuf->st_size) + { + /* + * Reached end of file. The file could be longer, if it was + * extended while we were sending it, but for a base backup we can + * ignore such extended data. It will be restored from WAL. + */ + break; + } + } + + CHECK_FREAD_ERROR(fp, readfilename); + + return len; +} diff --git a/src/bin/pg_basebackup/pg_basebackup.c b/src/bin/pg_basebackup/pg_basebackup.c index 1791853..8df453b 100644 --- a/src/bin/pg_basebackup/pg_basebackup.c +++ b/src/bin/pg_basebackup/pg_basebackup.c @@ -31,6 +31,7 @@ #include "common/file_utils.h" #include "common/logging.h" #include "common/string.h" +#include "fe_utils/simple_list.h" #include "fe_utils/string_utils.h" #include "getopt_long.h" #include "libpq-fe.h" @@ -43,19 +44,6 @@ #define ERRCODE_DATA_CORRUPTED "XX001" -typedef struct TablespaceListCell -{ - struct TablespaceListCell *next; - char old_dir[MAXPGPATH]; - char new_dir[MAXPGPATH]; -} TablespaceListCell; - -typedef struct TablespaceList -{ - TablespaceListCell *head; - TablespaceListCell *tail; -} TablespaceList; - /* * pg_xlog has been renamed to pg_wal in version 10. This version number * should be compared with PQserverVersion(). @@ -155,9 +143,6 @@ static void BaseBackup(void); static bool reached_end_position(XLogRecPtr segendpos, uint32 timeline, bool segment_finished); -static const char *get_tablespace_mapping(const char *dir); -static void tablespace_list_append(const char *arg); - static void cleanup_directories_atexit(void) @@ -227,84 +212,6 @@ kill_bgchild_atexit(void) } #endif -/* - * Split argument into old_dir and new_dir and append to tablespace mapping - * list. - */ -static void -tablespace_list_append(const char *arg) -{ - TablespaceListCell *cell = (TablespaceListCell *) pg_malloc0(sizeof(TablespaceListCell)); - char *dst; - char *dst_ptr; - const char *arg_ptr; - - dst_ptr = dst = cell->old_dir; - for (arg_ptr = arg; *arg_ptr; arg_ptr++) - { - if (dst_ptr - dst >= MAXPGPATH) - { - pg_log_error("directory name too long"); - exit(1); - } - - if (*arg_ptr == '\\' && *(arg_ptr + 1) == '=') - ; /* skip backslash escaping = */ - else if (*arg_ptr == '=' && (arg_ptr == arg || *(arg_ptr - 1) != '\\')) - { - if (*cell->new_dir) - { - pg_log_error("multiple \"=\" signs in tablespace mapping"); - exit(1); - } - else - dst = dst_ptr = cell->new_dir; - } - else - *dst_ptr++ = *arg_ptr; - } - - if (!*cell->old_dir || !*cell->new_dir) - { - pg_log_error("invalid tablespace mapping format \"%s\", must be \"OLDDIR=NEWDIR\"", arg); - exit(1); - } - - /* - * This check isn't absolutely necessary. But all tablespaces are created - * with absolute directories, so specifying a non-absolute path here would - * just never match, possibly confusing users. It's also good to be - * consistent with the new_dir check. - */ - if (!is_absolute_path(cell->old_dir)) - { - pg_log_error("old directory is not an absolute path in tablespace mapping: %s", - cell->old_dir); - exit(1); - } - - if (!is_absolute_path(cell->new_dir)) - { - pg_log_error("new directory is not an absolute path in tablespace mapping: %s", - cell->new_dir); - exit(1); - } - - /* - * Comparisons done with these values should involve similarly - * canonicalized path values. This is particularly sensitive on Windows - * where path values may not necessarily use Unix slashes. - */ - canonicalize_path(cell->old_dir); - canonicalize_path(cell->new_dir); - - if (tablespace_dirs.tail) - tablespace_dirs.tail->next = cell; - else - tablespace_dirs.head = cell; - tablespace_dirs.tail = cell; -} - #ifdef HAVE_LIBZ static const char * @@ -1359,28 +1266,6 @@ ReceiveTarFile(PGconn *conn, PGresult *res, int rownum) /* - * Retrieve tablespace path, either relocated or original depending on whether - * -T was passed or not. - */ -static const char * -get_tablespace_mapping(const char *dir) -{ - TablespaceListCell *cell; - char canon_dir[MAXPGPATH]; - - /* Canonicalize path for comparison consistency */ - strlcpy(canon_dir, dir, sizeof(canon_dir)); - canonicalize_path(canon_dir); - - for (cell = tablespace_dirs.head; cell; cell = cell->next) - if (strcmp(canon_dir, cell->old_dir) == 0) - return cell->new_dir; - - return dir; -} - - -/* * Receive a tar format stream from the connection to the server, and unpack * the contents of it into a directory. Only files, directories and * symlinks are supported, no other kinds of special files. @@ -1406,7 +1291,8 @@ ReceiveAndUnpackTarFile(PGconn *conn, PGresult *res, int rownum) strlcpy(current_path, basedir, sizeof(current_path)); else strlcpy(current_path, - get_tablespace_mapping(PQgetvalue(res, rownum, 1)), + get_tablespace_mapping(&tablespace_dirs, + PQgetvalue(res, rownum, 1)), sizeof(current_path)); /* @@ -1537,7 +1423,8 @@ ReceiveAndUnpackTarFile(PGconn *conn, PGresult *res, int rownum) */ filename[strlen(filename) - 1] = '\0'; /* Remove trailing slash */ - mapped_tblspc_path = get_tablespace_mapping(©buf[157]); + mapped_tblspc_path = get_tablespace_mapping(&tablespace_dirs, + ©buf[157]); if (symlink(mapped_tblspc_path, filename) != 0) { pg_log_error("could not create symbolic link from \"%s\" to \"%s\": %m", @@ -1966,7 +1853,9 @@ BaseBackup(void) */ if (format == 'p' && !PQgetisnull(res, i, 1)) { - char *path = unconstify(char *, get_tablespace_mapping(PQgetvalue(res, i, 1))); + char *path = unconstify(char *, + get_tablespace_mapping(&tablespace_dirs, + PQgetvalue(res, i, 1))); verify_dir_is_empty_or_create(path, &made_tablespace_dirs, &found_tablespace_dirs); } @@ -2276,7 +2165,7 @@ main(int argc, char **argv) no_slot = true; break; case 'T': - tablespace_list_append(optarg); + tablespace_list_append(&tablespace_dirs, optarg); break; case 'X': if (strcmp(optarg, "n") == 0 || diff --git a/src/fe_utils/simple_list.c b/src/fe_utils/simple_list.c index cfdb7dc..90ef114 100644 --- a/src/fe_utils/simple_list.c +++ b/src/fe_utils/simple_list.c @@ -16,6 +16,7 @@ */ #include "postgres_fe.h" +#include "common/logging.h" #include "fe_utils/simple_list.h" @@ -152,3 +153,106 @@ simple_string_list_not_touched(SimpleStringList *list) } return NULL; } + +/* + * Split argument into old_dir and new_dir and append to tablespace mapping + * list. + */ +void +tablespace_list_append(TablespaceList *tablespace_dirs, const char *arg) +{ + TablespaceListCell *cell = (TablespaceListCell *) pg_malloc0(sizeof(TablespaceListCell)); + char *dst; + char *dst_ptr; + const char *arg_ptr; + + Assert(tablespace_dirs); + + dst_ptr = dst = cell->old_dir; + for (arg_ptr = arg; *arg_ptr; arg_ptr++) + { + if (dst_ptr - dst >= MAXPGPATH) + { + pg_log_error("directory name too long"); + exit(1); + } + + if (*arg_ptr == '\\' && *(arg_ptr + 1) == '=') + ; /* skip backslash escaping = */ + else if (*arg_ptr == '=' && (arg_ptr == arg || *(arg_ptr - 1) != '\\')) + { + if (*cell->new_dir) + { + pg_log_error("multiple \"=\" signs in tablespace mapping"); + exit(1); + } + else + dst = dst_ptr = cell->new_dir; + } + else + *dst_ptr++ = *arg_ptr; + } + + if (!*cell->old_dir || !*cell->new_dir) + { + pg_log_error("invalid tablespace mapping format \"%s\", must be \"OLDDIR=NEWDIR\"", arg); + exit(1); + } + + /* + * This check isn't absolutely necessary. But all tablespaces are created + * with absolute directories, so specifying a non-absolute path here would + * just never match, possibly confusing users. It's also good to be + * consistent with the new_dir check. + */ + if (!is_absolute_path(cell->old_dir)) + { + pg_log_error("old directory is not an absolute path in tablespace mapping: %s", + cell->old_dir); + exit(1); + } + + if (!is_absolute_path(cell->new_dir)) + { + pg_log_error("new directory is not an absolute path in tablespace mapping: %s", + cell->new_dir); + exit(1); + } + + /* + * Comparisons done with these values should involve similarly + * canonicalized path values. This is particularly sensitive on Windows + * where path values may not necessarily use Unix slashes. + */ + canonicalize_path(cell->old_dir); + canonicalize_path(cell->new_dir); + + if (tablespace_dirs->tail) + tablespace_dirs->tail->next = cell; + else + tablespace_dirs->head = cell; + tablespace_dirs->tail = cell; +} + +/* + * Retrieve tablespace path, either relocated or original depending on whether + * -T was passed or not. + */ +const char * +get_tablespace_mapping(TablespaceList *tablespace_dirs, const char *dir) +{ + TablespaceListCell *cell; + char canon_dir[MAXPGPATH]; + + Assert(tablespace_dirs); + + /* Canonicalize path for comparison consistency */ + strlcpy(canon_dir, dir, sizeof(canon_dir)); + canonicalize_path(canon_dir); + + for (cell = tablespace_dirs->head; cell; cell = cell->next) + if (strcmp(canon_dir, cell->old_dir) == 0) + return cell->new_dir; + + return dir; +} diff --git a/src/include/fe_utils/simple_list.h b/src/include/fe_utils/simple_list.h index 75738be..436358b 100644 --- a/src/include/fe_utils/simple_list.h +++ b/src/include/fe_utils/simple_list.h @@ -6,6 +6,9 @@ * these is very primitive compared to the backend's List facilities, but * it's all we need in, eg, pg_dump. * + * Also, has data structures for simple lists of tablespace mappings used in + * backups. + * * * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California @@ -43,6 +46,19 @@ typedef struct SimpleStringList SimpleStringListCell *tail; } SimpleStringList; +typedef struct TablespaceListCell +{ + struct TablespaceListCell *next; + char old_dir[MAXPGPATH]; + char new_dir[MAXPGPATH]; +} TablespaceListCell; + +typedef struct TablespaceList +{ + TablespaceListCell *head; + TablespaceListCell *tail; +} TablespaceList; + extern void simple_oid_list_append(SimpleOidList *list, Oid val); extern bool simple_oid_list_member(SimpleOidList *list, Oid val); @@ -54,4 +70,9 @@ extern void simple_string_list_destroy(SimpleStringList *list); extern const char *simple_string_list_not_touched(SimpleStringList *list); +extern void tablespace_list_append(TablespaceList *tablespace_dirs, + const char *arg); +extern const char *get_tablespace_mapping(TablespaceList *tablespace_dirs, + const char *dir); + #endif /* SIMPLE_LIST_H */ -- 1.8.3.1