From 7ed1a1ed81f2ba02f448847e6aa7618cfb1d4cbc Mon Sep 17 00:00:00 2001 From: Eric Bollengier Date: Mon, 24 Aug 2009 18:09:19 +0200 Subject: [PATCH] update basejob code to use checksum during accurate check --- bacula/src/cats/sql_create.c | 3 +- bacula/src/cats/sql_get.c | 11 +- bacula/src/dird/backup.c | 19 ++- bacula/src/filed/accurate.c | 143 +++++++++++++++----- bacula/src/findlib/find.c | 2 +- regress/scripts/bacula-dir.conf.accurate.in | 4 + regress/tests/base-job-test | 7 +- 7 files changed, 139 insertions(+), 50 deletions(-) diff --git a/bacula/src/cats/sql_create.c b/bacula/src/cats/sql_create.c index e64460641e..14d1788481 100644 --- a/bacula/src/cats/sql_create.c +++ b/bacula/src/cats/sql_create.c @@ -1258,7 +1258,8 @@ bool db_create_base_file_list(JCR *jcr, B_DB *mdb, char *jobids) "CREATE TEMPORARY TABLE new_basefile%lld AS ( " //"CREATE TABLE new_basefile%lld AS ( " "SELECT Path.Path AS Path, Filename.Name AS Name, File.FileIndex AS FileIndex," - "File.JobId AS JobId, File.LStat AS LStat, File.FileId AS FileId " + "File.JobId AS JobId, File.LStat AS LStat, File.FileId AS FileId, " + "File.MD5 AS MD5 " "FROM ( " "SELECT max(FileId) as FileId, PathId, FilenameId " "FROM (SELECT FileId, PathId, FilenameId FROM File WHERE JobId IN (%s)) AS F " diff --git a/bacula/src/cats/sql_get.c b/bacula/src/cats/sql_get.c index 7e5901c8a7..2d64e90316 100644 --- a/bacula/src/cats/sql_get.c +++ b/bacula/src/cats/sql_get.c @@ -1044,10 +1044,12 @@ bool db_get_media_record(JCR *jcr, B_DB *mdb, MEDIA_DBR *mr) } /* - * Find the last "accurate" backup state (that can take deleted files in account) + * Find the last "accurate" backup state (that can take deleted files in + * account) * 1) Get all files with jobid in list (F subquery) * Get all files in BaseFiles with jobid in list - * 2) Take only the last version of each file (Temp subquery) => accurate list is ok + * 2) Take only the last version of each file (Temp subquery) => accurate list + * is ok * 3) Join the result to file table to get fileindex, jobid and lstat information * * TODO: See if we can do the SORT only if needed (as an argument) @@ -1066,7 +1068,8 @@ bool db_get_file_list(JCR *jcr, B_DB *mdb, char *jobids, #define new_db_get_file_list #ifdef new_db_get_file_list Mmsg(buf, - "SELECT Path.Path, Filename.Name, File.FileIndex, File.JobId, File.LStat " + "SELECT Path.Path, Filename.Name, File.FileIndex, File.JobId, " + "File.LStat, File.MD5 " "FROM ( " "SELECT max(FileId) as FileId, PathId, FilenameId " "FROM (SELECT FileId, PathId, FilenameId FROM File WHERE JobId IN (%s) " @@ -1214,7 +1217,7 @@ bool db_get_base_file_list(JCR *jcr, B_DB *mdb, POOL_MEM buf(PM_MESSAGE); Mmsg(buf, - "SELECT Path, Name, FileIndex, JobId, LStat " + "SELECT Path, Name, FileIndex, JobId, LStat, MD5 " "FROM new_basefile%lld ORDER BY JobId, FileIndex ASC", (uint64_t) jcr->JobId); diff --git a/bacula/src/dird/backup.c b/bacula/src/dird/backup.c index bbbb4460e0..3fa560aa6a 100644 --- a/bacula/src/dird/backup.c +++ b/bacula/src/dird/backup.c @@ -136,7 +136,7 @@ static bool get_base_jobids(JCR *jcr, POOLMEM *jobids) } /* - * Foreach files in currrent list, send "/path/fname\0LStat" to FD + * Foreach files in currrent list, send "/path/fname\0LStat\0MD5" to FD */ static int accurate_list_handler(void *ctx, int num_fields, char **row) { @@ -146,8 +146,17 @@ static int accurate_list_handler(void *ctx, int num_fields, char **row) return 1; } - if (row[2] > 0) { /* discard when file_index == 0 */ - jcr->file_bsock->fsend("%s%s%c%s", row[0], row[1], 0, row[4]); + if (row[2] == 0) { /* discard when file_index == 0 */ + return 0; + } + + /* sending with checksum */ + if (num_fields == 6 && row[5][0] && row[5][1]) { /* skip checksum = '0' */ + jcr->file_bsock->fsend("%s%s%c%s%c%s", + row[0], row[1], 0, row[4], 0, row[5]); + } else { + jcr->file_bsock->fsend("%s%s%c%s", + row[0], row[1], 0, row[4]); } return 0; } @@ -155,8 +164,8 @@ static int accurate_list_handler(void *ctx, int num_fields, char **row) /* * Send current file list to FD * DIR -> FD : accurate files=xxxx - * DIR -> FD : /path/to/file\0Lstat - * DIR -> FD : /path/to/dir/\0Lstat + * DIR -> FD : /path/to/file\0Lstat\0MD5 + * DIR -> FD : /path/to/dir/\0Lstat\0MD5 * ... * DIR -> FD : EOD */ diff --git a/bacula/src/filed/accurate.c b/bacula/src/filed/accurate.c index 1541fa889d..6def5178c8 100644 --- a/bacula/src/filed/accurate.c +++ b/bacula/src/filed/accurate.c @@ -39,6 +39,7 @@ typedef struct PrivateCurFile { hlink link; char *fname; char *lstat; + char *chksum; bool seen; } CurFile; @@ -197,7 +198,8 @@ bool accurate_finish(JCR *jcr) return ret; } -static bool accurate_add_file(JCR *jcr, char *fname, char *lstat) +static bool accurate_add_file(JCR *jcr, uint32_t len, + char *fname, char *lstat, char *chksum) { bool ret = true; CurFile elt; @@ -205,15 +207,21 @@ static bool accurate_add_file(JCR *jcr, char *fname, char *lstat) CurFile *item; /* we store CurFile, fname and ctime/mtime in the same chunk */ - item = (CurFile *)jcr->file_list->hash_malloc(sizeof(CurFile)+strlen(fname)+strlen(lstat)+2); + item = (CurFile *)jcr->file_list->hash_malloc(sizeof(CurFile)+len+3); memcpy(item, &elt, sizeof(CurFile)); + item->fname = (char *)item+sizeof(CurFile); strcpy(item->fname, fname); + item->lstat = item->fname+strlen(item->fname)+1; strcpy(item->lstat, lstat); + + item->chksum = item->lstat+strlen(item->lstat)+1; + strcpy(item->chksum, chksum); + jcr->file_list->insert(item->fname, item); - Dmsg2(dbglvl, "add fname=<%s> lstat=%s\n", fname, lstat); + Dmsg3(dbglvl, "add fname=<%s> lstat=%s chksum=%s\n", fname, lstat, chksum); return ret; } @@ -228,6 +236,9 @@ static bool accurate_add_file(JCR *jcr, char *fname, char *lstat) */ bool accurate_check_file(JCR *jcr, FF_PKT *ff_pkt) { + int digest_stream = STREAM_NONE; + DIGEST *digest = NULL; + struct stat statc; int32_t LinkFIc; bool stat = false; @@ -259,12 +270,11 @@ bool accurate_check_file(JCR *jcr, FF_PKT *ff_pkt) decode_stat(elt.lstat, &statc, &LinkFIc); /* decode catalog stat */ -//#if 0 /* * Loop over options supplied by user and verify the * fields he requests. */ - for (char *p=ff_pkt->AccurateOpts; *p; p++) { + for (char *p=ff_pkt->AccurateOpts; !stat && *p; p++) { char ed1[30], ed2[30]; switch (*p) { case 'i': /* compare INODEs */ @@ -323,7 +333,7 @@ bool accurate_check_file(JCR *jcr, FF_PKT *ff_pkt) stat = true; } break; - case 'm': + case 'm': /* modification time */ if (statc.st_mtime != ff_pkt->statp.st_mtime) { Dmsg1(dbglvl-1, "%s st_mtime differs\n", fname); stat = true; @@ -331,7 +341,7 @@ bool accurate_check_file(JCR *jcr, FF_PKT *ff_pkt) break; case 'c': /* ctime */ if (statc.st_ctime != ff_pkt->statp.st_ctime) { - Dmsg1(dbglvl-1, " st_ctime differs\n", fname); + Dmsg1(dbglvl-1, "%s st_ctime differs\n", fname); stat = true; } break; @@ -344,39 +354,85 @@ bool accurate_check_file(JCR *jcr, FF_PKT *ff_pkt) stat = true; } break; + + /* TODO: cleanup and factorise this function with verify.c */ case '5': /* compare MD5 */ - break; case '1': /* compare SHA1 */ + /* + * The remainder of the function is all about getting the checksum. + * First we initialise, then we read files, other streams and Finder Info. + */ + if (!stat && *elt.chksum && ff_pkt->type != FT_LNKSAVED && + (S_ISREG(ff_pkt->statp.st_mode) && + ff_pkt->flags & (FO_MD5|FO_SHA1|FO_SHA256|FO_SHA512))) + { + /* + * Create our digest context. If this fails, the digest will be set to NULL + * and not used. + */ + if (ff_pkt->flags & FO_MD5) { + digest = crypto_digest_new(jcr, CRYPTO_DIGEST_MD5); + digest_stream = STREAM_MD5_DIGEST; + + } else if (ff_pkt->flags & FO_SHA1) { + digest = crypto_digest_new(jcr, CRYPTO_DIGEST_SHA1); + digest_stream = STREAM_SHA1_DIGEST; + + } else if (ff_pkt->flags & FO_SHA256) { + digest = crypto_digest_new(jcr, CRYPTO_DIGEST_SHA256); + digest_stream = STREAM_SHA256_DIGEST; + + } else if (ff_pkt->flags & FO_SHA512) { + digest = crypto_digest_new(jcr, CRYPTO_DIGEST_SHA512); + digest_stream = STREAM_SHA512_DIGEST; + } + + /* Did digest initialization fail? */ + if (digest_stream != STREAM_NONE && digest == NULL) { + Jmsg(jcr, M_WARNING, 0, _("%s digest initialization failed\n"), + stream_to_ascii(digest_stream)); + } + + /* compute MD5 or SHA1 hash */ + if (digest) { + char md[CRYPTO_DIGEST_MAX_SIZE]; + uint32_t size; + + size = sizeof(md); + + if (digest_file(jcr, ff_pkt, digest) != 0) { + jcr->JobErrors++; + + } else if (crypto_digest_finalize(digest, (uint8_t *)md, &size)) { + char *digest_buf; + const char *digest_name; + + digest_buf = (char *)malloc(BASE64_SIZE(size)); + digest_name = crypto_digest_name(digest); + + bin_to_base64(digest_buf, BASE64_SIZE(size), md, size, true); + + if (strcmp(digest_buf, elt.chksum)) { + Dmsg3(dbglvl-1, "%s chksum diff. Cat: %s File: %s\n", + fname, + elt.chksum, + digest_buf); + stat = true; + } + + free(digest_buf); + } + crypto_digest_free(digest); + } + } + break; case ':': case 'C': default: break; - } - } -//#endif -#if 0 - /* - * We check only mtime/ctime like with the normal - * incremental/differential mode - */ - if (statc.st_mtime != ff_pkt->statp.st_mtime) { -// Jmsg(jcr, M_SAVED, 0, _("%s st_mtime differs\n"), fname); - Dmsg3(dbglvl, "%s st_mtime differs (%lld!=%lld)\n", - fname, statc.st_mtime, (utime_t)ff_pkt->statp.st_mtime); - stat = true; - } else if (!(ff_pkt->flags & FO_MTIMEONLY) - && (statc.st_ctime != ff_pkt->statp.st_ctime)) { -// Jmsg(jcr, M_SAVED, 0, _("%s st_ctime differs\n"), fname); - Dmsg1(dbglvl, "%s st_ctime differs\n", fname); - stat = true; - - } else if (statc.st_size != ff_pkt->statp.st_size) { -// Jmsg(jcr, M_SAVED, 0, _("%s st_size differs\n"), fname); - Dmsg1(dbglvl, "%s st_size differs\n", fname); - stat = true; + } } -#endif /* In Incr/Diff accurate mode, we mark all files as seen * When in Full+Base mode, we mark only if the file match exactly @@ -402,7 +458,7 @@ bail_out: int accurate_cmd(JCR *jcr) { BSOCK *dir = jcr->dir_bsock; - int len; + int lstat_pos, chksum_pos; int32_t nb; if (job_canceled(jcr)) { @@ -419,13 +475,26 @@ int accurate_cmd(JCR *jcr) /* * buffer = sizeof(CurFile) + dirmsg - * dirmsg = fname + \0 + lstat + * dirmsg = fname + \0 + lstat + \0 + checksum + \0 */ /* get current files */ while (dir->recv() >= 0) { - len = strlen(dir->msg) + 1; - if (len < dir->msglen) { - accurate_add_file(jcr, dir->msg, dir->msg + len); + lstat_pos = strlen(dir->msg) + 1; + if (lstat_pos < dir->msglen) { + chksum_pos = lstat_pos + strlen(dir->msg + lstat_pos) + 1; + + Dmsg3(dbglvl, "len=%i lstat_pos=%i chksum_pos=%i\n", + (uint32_t) dir->msglen, + (uint32_t) lstat_pos, (uint32_t) chksum_pos); + + if (chksum_pos >= dir->msglen) { + chksum_pos = lstat_pos - 1; /* no checksum, point to the last \0 */ + } + + accurate_add_file(jcr, dir->msglen, + dir->msg, /* Path */ + dir->msg + lstat_pos, /* LStat */ + dir->msg + chksum_pos); /* CheckSum */ } } diff --git a/bacula/src/findlib/find.c b/bacula/src/findlib/find.c index 0b6059538d..0a4599e43a 100644 --- a/bacula/src/findlib/find.c +++ b/bacula/src/findlib/find.c @@ -175,7 +175,7 @@ find_files(JCR *jcr, FF_PKT *ff, int file_save(JCR *jcr, FF_PKT *ff_pkt, bool to ff->flags = 0; ff->VerifyOpts[0] = 'V'; ff->VerifyOpts[1] = 0; - strcpy(ff->AccurateOpts, "C:mc"); /* mtime+ctime by default */ + strcpy(ff->AccurateOpts, "C:mcs"); /* mtime+ctime+size by default */ for (i=0; iinclude_list.size(); i++) { findINCEXE *incexe = (findINCEXE *)fileset->include_list.get(i); fileset->incexe = incexe; diff --git a/regress/scripts/bacula-dir.conf.accurate.in b/regress/scripts/bacula-dir.conf.accurate.in index 2d7cfdecb1..f53fe83934 100644 --- a/regress/scripts/bacula-dir.conf.accurate.in +++ b/regress/scripts/bacula-dir.conf.accurate.in @@ -41,7 +41,9 @@ FileSet { Name = FS_TESTJOB Include { Options { + Signature = MD5 Verify = mc + Accurate = mcs5 } File=<@tmpdir@/file-list } @@ -51,6 +53,7 @@ FileSet { Name = FS_TESTJOB2 Include { Options { + Signature = MD5 Verify = mc strippath=1 } @@ -62,6 +65,7 @@ FileSet { Name = FS_TESTJOB_ADVANCE Include { Options { + Signature = MD5 Verify = mcpgu } File=<@tmpdir@/file-list diff --git a/regress/tests/base-job-test b/regress/tests/base-job-test index 37ea770451..1882e66107 100755 --- a/regress/tests/base-job-test +++ b/regress/tests/base-job-test @@ -10,8 +10,11 @@ JobName=backup $rscripts/cleanup copy_test_confs -sed 's/backup_advance/base_backup/' $rscripts/bacula-dir.conf.accurate > $tmp/1 -sed 's/Name = backup/Name = backup; Base = base_backup, backup/' $tmp/1 > $conf/bacula-dir.conf +echo 's/backup_advance/base_backup/' > $tmp/s +echo 's/Name = backup/Name = backup; Base = base_backup, backup/' >> $tmp/s +sed -f $tmp/s $rscripts/bacula-dir.conf.accurate > $conf/bacula-dir.conf +rm -f $tmp/s + sed s/all,/all,saved,/ $conf/bacula-fd.conf > tmp/1 cp tmp/1 $conf/bacula-fd.conf -- 2.39.5