From 740c153a712939d6c65f7d31592b75512544f383 Mon Sep 17 00:00:00 2001 From: Jeevan Chalke Date: Fri, 16 Aug 2019 14:10:33 +0530 Subject: [PATCH 3/4] Add support for the incremental backup. If file is modified 90% or more, we send a whole file else we send only those blocks which are modified. The file is named .partial and has following header details: - magic number, currently 0 (4 bytes) - checksum, currently 0 (4 bytes) - number of blocks in this .partial file (4 bytes) - all modified block numbers (4 bytes each) - modified blocks --- doc/src/sgml/protocol.sgml | 50 ++++++- doc/src/sgml/ref/pg_basebackup.sgml | 21 +++ src/backend/access/transam/xlog.c | 5 +- src/backend/access/transam/xlogfuncs.c | 6 +- src/backend/replication/basebackup.c | 244 +++++++++++++++++++++++++++++++-- src/backend/storage/file/fd.c | 29 ++++ src/include/access/xlog.h | 3 +- src/include/replication/basebackup.h | 13 ++ src/include/storage/fd.h | 1 + 9 files changed, 354 insertions(+), 18 deletions(-) diff --git a/doc/src/sgml/protocol.sgml b/doc/src/sgml/protocol.sgml index b20f169..fb07585 100644 --- a/doc/src/sgml/protocol.sgml +++ b/doc/src/sgml/protocol.sgml @@ -2466,7 +2466,7 @@ The commands accepted in replication mode are: - BASE_BACKUP [ LABEL 'label' ] [ PROGRESS ] [ FAST ] [ WAL ] [ NOWAIT ] [ MAX_RATE rate ] [ TABLESPACE_MAP ] [ NOVERIFY_CHECKSUMS ] + BASE_BACKUP [ LABEL 'label' ] [ PROGRESS ] [ FAST ] [ WAL ] [ NOWAIT ] [ MAX_RATE rate ] [ TABLESPACE_MAP ] [ NOVERIFY_CHECKSUMS ] [ LSN 'lsn' ] BASE_BACKUP @@ -2576,6 +2576,22 @@ The commands accepted in replication mode are: + + + LSN 'lsn' + + + Includes only those data blocks in backup which has LSN greater than + or equal to the given lsn. However, if 90% or more data blocks are + modified in the file, then sends the entire file. Otherwise, creates + a .partial file containing only the blocks which + are modified and sends that instead. The .partial + file has its own header followed by the actual data blocks. Note that + only relation files are considered here, all other files are sent as + is. + + + @@ -2698,6 +2714,38 @@ The commands accepted in replication mode are: Owner, group, and file mode are set if the underlying file system on the server supports it. + + An incremental backup's .partial file has the + following format: + + + + Starts with a 4-byte magic number + + + + + Followed by a 4-byte CRC of the header (containing a magic number, + count of the number of blocks, and all block numbers) + + + + + Then a 4-byte count of the number of blocks included in the file + + + + + Then the block numbers, each as a 4-byte quantity + + + + + Followed by the actual data blocks in order with the block numbers + + + + diff --git a/doc/src/sgml/ref/pg_basebackup.sgml b/doc/src/sgml/ref/pg_basebackup.sgml index fc9e222..00782e0 100644 --- a/doc/src/sgml/ref/pg_basebackup.sgml +++ b/doc/src/sgml/ref/pg_basebackup.sgml @@ -408,6 +408,19 @@ PostgreSQL documentation + + + + Takes an incremental backup, using LSN as a threshold. Only the blocks + which are modified after this given LSN will be backed up. The file + which has these partial blocks has .partial as an extension. Backup + taken in this manner has to be combined with the full backup with the + pg_combinebackup utility. + + + + + @@ -792,6 +805,14 @@ PostgreSQL documentation $ pg_basebackup -D backup/data -T /opt/ts=$(pwd)/backup/ts + + + To create an incremental backup having LSN greater than or equal to + 5/19000060: + +$ pg_basebackup -D incbackup --lsn='5/19000060' + + diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index f553523..e427c0f 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -10178,7 +10178,7 @@ XLogRecPtr do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p, StringInfo labelfile, List **tablespaces, StringInfo tblspcmapfile, bool infotbssize, - bool needtblspcmapfile) + bool needtblspcmapfile, XLogRecPtr ref_lsn) { bool exclusive = (labelfile == NULL); bool backup_started_in_recovery = false; @@ -10506,6 +10506,9 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p, appendStringInfo(labelfile, "START TIME: %s\n", strfbuf); appendStringInfo(labelfile, "LABEL: %s\n", backupidstr); appendStringInfo(labelfile, "START TIMELINE: %u\n", starttli); + if (!XLogRecPtrIsInvalid(ref_lsn)) + appendStringInfo(labelfile, "INCREMENTAL BACKUP REFERENCE WAL LOCATION: %X/%X\n", + (uint32) (ref_lsn >> 32), (uint32) ref_lsn); /* * Okay, write the file, or return its contents to caller. diff --git a/src/backend/access/transam/xlogfuncs.c b/src/backend/access/transam/xlogfuncs.c index b35043b..ef8b283 100644 --- a/src/backend/access/transam/xlogfuncs.c +++ b/src/backend/access/transam/xlogfuncs.c @@ -89,7 +89,8 @@ pg_start_backup(PG_FUNCTION_ARGS) if (exclusive) { startpoint = do_pg_start_backup(backupidstr, fast, NULL, NULL, - NULL, NULL, false, true); + NULL, NULL, false, true, + InvalidXLogRecPtr); } else { @@ -105,7 +106,8 @@ pg_start_backup(PG_FUNCTION_ARGS) MemoryContextSwitchTo(oldcontext); startpoint = do_pg_start_backup(backupidstr, fast, NULL, label_file, - NULL, tblspc_map_file, false, true); + NULL, tblspc_map_file, false, true, + InvalidXLogRecPtr); before_shmem_exit(nonexclusive_base_backup_cleanup, (Datum) 0); } diff --git a/src/backend/replication/basebackup.c b/src/backend/replication/basebackup.c index 18e992c..a2c0756 100644 --- a/src/backend/replication/basebackup.c +++ b/src/backend/replication/basebackup.c @@ -82,6 +82,12 @@ static pgoff_t do_full_backup(const char *readfilename, const char *tarfilename, FILE *fp, struct stat *statbuf, int segmentno, bool verify_checksum, int *checksum_failures); +static pgoff_t do_incremental_backup(const char *readfilename, + const char *tarfilename, FILE *fp, + XLogRecPtr refptr, + struct stat *statbuf, int segmentno, + bool verify_checksum, + int *checksum_failures); /* Was the backup currently in-progress initiated in recovery mode? */ static bool backup_started_in_recovery = false; @@ -99,6 +105,11 @@ static char *statrelpath = NULL; */ #define THROTTLING_FREQUENCY 8 +/* + * When to send the whole file, % blocks modified (90%) + */ +#define WHOLE_FILE_THRESHOLD 0.9 + /* The actual number of bytes, transfer of which may cause sleep. */ static uint64 throttling_sample; @@ -114,6 +125,9 @@ static TimestampTz throttled_last; /* The starting XLOG position of the base backup. */ static XLogRecPtr startptr; +/* The reference XLOG position for the incremental backup. */ +static XLogRecPtr refptr; + /* Total number of checksum failures during base backup. */ static long long int total_checksum_failures; @@ -254,7 +268,9 @@ perform_base_backup(basebackup_options *opt) startptr = do_pg_start_backup(opt->label, opt->fastcheckpoint, &starttli, labelfile, &tablespaces, tblspc_map_file, - opt->progress, opt->sendtblspcmapfile); + opt->progress, opt->sendtblspcmapfile, + opt->lsn); + refptr = opt->lsn; /* * Once do_pg_start_backup has been called, ensure that any failure causes @@ -1392,6 +1408,7 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf int segmentno = 0; char *segmentpath; bool verify_checksum = false; + char *filename; fp = AllocateFile(readfilename, "rb"); if (fp == NULL) @@ -1403,17 +1420,15 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf errmsg("could not open file \"%s\": %m", readfilename))); } + /* + * Get the filename (excluding path). As last_dir_separator() includes + * the last directory separator, we chop that off by incrementing the + * pointer. + */ + filename = last_dir_separator(readfilename) + 1; + if (!noverify_checksums && DataChecksumsEnabled()) { - char *filename; - - /* - * Get the filename (excluding path). As last_dir_separator() - * includes the last directory separator, we chop that off by - * incrementing the pointer. - */ - filename = last_dir_separator(readfilename) + 1; - if (is_checksummed_file(readfilename, filename)) { verify_checksum = true; @@ -1434,9 +1449,23 @@ sendFile(const char *readfilename, const char *tarfilename, struct stat *statbuf } } - /* Perform full backup */ - len = do_full_backup(readfilename, tarfilename, fp, statbuf, segmentno, - verify_checksum, &checksum_failures); + /* + * If incremental backup, see whether the filename is a relation filename + * or not. + */ + if (refptr && OidIsValid(dboid) && looks_like_rel_name(filename)) + { + /* Perform incremental backup */ + len = do_incremental_backup(readfilename, tarfilename, fp, + refptr, statbuf, segmentno, + verify_checksum, &checksum_failures); + } + else + { + /* Perform full backup */ + len = do_full_backup(readfilename, tarfilename, fp, statbuf, segmentno, + verify_checksum, &checksum_failures); + } /* If the file was truncated while we were sending it, pad it with zeros */ if (len < statbuf->st_size) @@ -1775,3 +1804,192 @@ do_full_backup(const char *readfilename, const char *tarfilename, FILE *fp, return len; } + +/* + * do_incremental_backup + * + * Perform incremental backup. + */ +static pgoff_t +do_incremental_backup(const char *readfilename, const char *tarfilename, + FILE *fp, XLogRecPtr refptr, struct stat *statbuf, + int segmentno, bool verify_checksum, + int *checksum_failures) +{ + char *buf; + off_t cnt; + pgoff_t len = 0; + BlockNumber blkno = 0; + int i; + bool sendwholefile = false; + + Assert(statbuf->st_size <= (RELSEG_SIZE * BLCKSZ)); + + buf = (char *) malloc(statbuf->st_size); + if (buf == NULL) + ereport(ERROR, + (errcode(ERRCODE_OUT_OF_MEMORY), + errmsg("out of memory"))); + + if ((cnt = fread(buf, 1, statbuf->st_size, fp)) > 0) + { + Bitmapset *mod_blocks = NULL; + int nmodblocks = 0; + + if (cnt % BLCKSZ != 0) + { + if (verify_checksum) + { + ereport(WARNING, + (errmsg("cannot verify checksum in file \"%s\", block " + "%d: read buffer size %d and page size %d " + "differ", + readfilename, blkno, (int) cnt, BLCKSZ))); + verify_checksum = false; + } + + ereport(WARNING, + (errmsg("file size (%d) not in multiple of page size (%d), sending whole file", + (int) cnt, BLCKSZ))); + + /* File size is not in multiple of BLCKSZ, send as is. */ + sendwholefile = true; + } + + /* + * Check each page LSN and see if it is modified after the given LSN or + * not. Create a bitmap of all such modified blocks and then decide + * whether we want to send a whole file or a partial file. Skip this + * check if we decided to send whole file already. + */ + if (!sendwholefile) + { + XLogRecPtr pglsn; + int nblocks = (cnt / BLCKSZ); + + for (i = 0; i < nblocks; i++) + { + char *page = buf + BLCKSZ * i; + + pglsn = PageGetLSN(page); + + if (pglsn >= refptr) + { + /* + * Verify checksum, if requested, for the modified blocks. + */ + if (verify_checksum) + verify_page_checksum(readfilename, fp, buf, cnt, i, + blkno, segmentno, + checksum_failures); + + mod_blocks = bms_add_member(mod_blocks, i); + } + + blkno++; + } + + nmodblocks = bms_num_members(mod_blocks); + + /* + * We need to send whole file if the modified block count is equal + * to or greater than the WHOLE_FILE_THRESHOLD. Check that. + */ + if (i > 0 && (nmodblocks / (double) i) >= WHOLE_FILE_THRESHOLD) + sendwholefile = true; + } + + /* + * If sendwholefile is true then we need to send the whole file as is. + * Otherwise send a partial file. + */ + if (sendwholefile) + { + _tarWriteHeader(tarfilename, NULL, statbuf, false); + + /* Send the chunk as a CopyData message */ + if (pq_putmessage('d', buf, cnt)) + ereport(ERROR, + (errmsg("base backup could not send data, aborting backup"))); + + len = cnt; + throttle(cnt); + } + else + { + int part_size = 0; + int part_header_size; + int blknum; + int blknocnt; + partial_file_header *pfh; + char *partialtarfilename = NULL; + + /* Create a partial file */ + + /* Calculate partial file size. */ + part_header_size = offsetof(partial_file_header, blocknumbers) + + (sizeof(uint32) * nmodblocks); + part_size = part_header_size + (BLCKSZ * nmodblocks); + + /* Add .partial to filename */ + partialtarfilename = (char *) palloc(strlen(tarfilename) + 9); + snprintf(partialtarfilename, strlen(tarfilename) + 9, "%s.partial", tarfilename); + + statbuf->st_size = part_size; + _tarWriteHeader(partialtarfilename, NULL, statbuf, false); + pfree(partialtarfilename); + + pfh = (partial_file_header *) palloc(part_header_size); + pfh->magic = INCREMENTAL_BACKUP_MAGIC; + pfh->nblocks = nmodblocks; + + blknum = -1; + blknocnt = 0; + while ((blknum = bms_next_member(mod_blocks, blknum)) >= 0) + { + pfh->blocknumbers[blknocnt] = blknum; + /* Calculate CRC for each block to be transferred. */ + blknocnt++; + } + + Assert(blknocnt == nmodblocks); + + /* Now calculate CRC for the header */ + INIT_CRC32C(pfh->checksum); + COMP_CRC32C(pfh->checksum, pfh, part_header_size); + + /* Send header */ + if (pq_putmessage('d', (char *) pfh, part_header_size)) + ereport(ERROR, + (errmsg("base backup could not send data, aborting backup"))); + throttle(part_header_size); + + /* Send data blocks */ + for (blknocnt = 0; blknocnt < nmodblocks; blknocnt++) + { + int offset = BLCKSZ * pfh->blocknumbers[blknocnt]; + + if (pq_putmessage('d', buf + offset, BLCKSZ)) + ereport(ERROR, + (errmsg("base backup could not send data, aborting backup"))); + throttle(BLCKSZ); + } + + Assert(blknocnt == nmodblocks); + + len = part_size; + pfree(pfh); + } + } + else + { + /* Send empty file as is */ + _tarWriteHeader(tarfilename, NULL, statbuf, false); + len = cnt; + } + + /* free buffer allocated */ + free(buf); + + return len; +} diff --git a/src/backend/storage/file/fd.c b/src/backend/storage/file/fd.c index a76112d..2990c52 100644 --- a/src/backend/storage/file/fd.c +++ b/src/backend/storage/file/fd.c @@ -3111,6 +3111,35 @@ looks_like_temp_rel_name(const char *name) return true; } +/* , or . */ +bool +looks_like_rel_name(const char *name) +{ + int pos; + + /* Look for a non-empty string of digits (that isn't too long). */ + for (pos = 0; isdigit((unsigned char) name[pos]); ++pos) + ; + if (pos == 0 || pos > OIDCHARS) + return false; + + if (name[pos] == '.') + { + int segchar; + + for (segchar = 1; isdigit((unsigned char) name[pos + segchar]); ++segchar) + ; + if (segchar <= 1) + return false; + pos += segchar; + } + + /* Now we should be at the end. */ + if (name[pos] != '\0') + return false; + return true; +} + /* * Issue fsync recursively on PGDATA and all its contents. diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index d519252..155385d 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -347,7 +347,8 @@ typedef enum SessionBackupState extern XLogRecPtr do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p, StringInfo labelfile, List **tablespaces, StringInfo tblspcmapfile, bool infotbssize, - bool needtblspcmapfile); + bool needtblspcmapfile, + XLogRecPtr ref_lsn); extern XLogRecPtr do_pg_stop_backup(char *labelfile, bool waitforarchive, TimeLineID *stoptli_p); extern void do_pg_abort_backup(void); diff --git a/src/include/replication/basebackup.h b/src/include/replication/basebackup.h index 503a5b9..1b35b08 100644 --- a/src/include/replication/basebackup.h +++ b/src/include/replication/basebackup.h @@ -20,6 +20,9 @@ #define MAX_RATE_LOWER 32 #define MAX_RATE_UPPER 1048576 +/* magic number in incremental backup's .partial file */ +#define INCREMENTAL_BACKUP_MAGIC 0x494E4352 + typedef struct { @@ -29,6 +32,16 @@ typedef struct int64 size; } tablespaceinfo; +/* Definition of the partial file header */ +typedef struct +{ + uint32 magic; + pg_crc32c checksum; + uint32 nblocks; + uint32 blocknumbers[FLEXIBLE_ARRAY_MEMBER]; +} partial_file_header; + + extern void SendBaseBackup(BaseBackupCmd *cmd); extern int64 sendTablespace(char *path, bool sizeonly); diff --git a/src/include/storage/fd.h b/src/include/storage/fd.h index d2a8c52..d25a390 100644 --- a/src/include/storage/fd.h +++ b/src/include/storage/fd.h @@ -136,6 +136,7 @@ extern void AtEOSubXact_Files(bool isCommit, SubTransactionId mySubid, SubTransactionId parentSubid); extern void RemovePgTempFiles(void); extern bool looks_like_temp_rel_name(const char *name); +extern bool looks_like_rel_name(const char *name); extern int pg_fsync(int fd); extern int pg_fsync_no_writethrough(int fd); -- 1.8.3.1