From: Kern Sibbald Date: Fri, 24 Mar 2006 16:37:56 +0000 (+0000) Subject: 24Mar06 X-Git-Tag: Release-2.0.0~987 X-Git-Url: https://git.sur5r.net/?a=commitdiff_plain;h=a1317ffc39cb931e47cf6aa3b438b4e6d783ed0b;p=bacula%2Fbacula 24Mar06 - Create datestyle fix for PostgreSQL. Fixes bug #574. - Correct editing of JobId from int to int64 in fd_cmds.c - Eliminate FileSet name race with bash_spaces() and multiple threads by bashing in a local. - Fix error return from 'use storage' to print a correct error message rather than nothing. - Correct false re-read last block error message when two jobs are simultaneously writing at the end of a tape. - Simplify exit conditions in the reserve.c code to avoid possible non-release of reservation_lock(). - Suffle lock order in reserve to avoid deadlock between reservation lock and device mutex. 21Mar06 - Initialize jcr mutex before first use. git-svn-id: https://bacula.svn.sourceforge.net/svnroot/bacula/trunk@2852 91ce42f0-d328-0410-95d8-f526ca767f89 --- diff --git a/bacula/ReleaseNotes b/bacula/ReleaseNotes index 1b9402964e..f83d2f3946 100644 --- a/bacula/ReleaseNotes +++ b/bacula/ReleaseNotes @@ -31,6 +31,11 @@ New features: - The wait command can now be made to wait for jobids. - New command line keywords are permitted in update volume. They are Inchanger=yes/no, slot=nn. +- Add two new console commands: enable job= and + disable job=. When a job is disabled, it will not + be started by the scheduler. If you disable a job and restart + Bacula or reload the .conf file, the job will be re-enabled. +- Add a new Job resource directive "enable = yes|no". Major bug fixes: - Fix race condition in multiple-drive autochangers where diff --git a/bacula/kernstodo b/bacula/kernstodo index 017b3b4fa9..8cf92e7613 100644 --- a/bacula/kernstodo +++ b/bacula/kernstodo @@ -16,6 +16,10 @@ Document: Priority: For 1.39: +- Fix re-read of last block to check if job has actually written + a block, and check if block was written by a different job + (i.e. multiple simultaneous jobs writing). +- JobStatus and Termination codes. - Some users claim that they must do two prune commands to get a Volume marked as purged. - Print warning message if LANG environment variable does not specify diff --git a/bacula/kes-1.38 b/bacula/kes-1.38 index 548e218a38..1223879a83 100644 --- a/bacula/kes-1.38 +++ b/bacula/kes-1.38 @@ -2,6 +2,31 @@ Kern Sibbald General: +24Mar06 +- Create datestyle fix for PostgreSQL. Fixes bug #574. +- Correct editing of JobId from int to int64 in fd_cmds.c +- Eliminate FileSet name race with bash_spaces() and multiple + threads by bashing in a local. +- Fix error return from 'use storage' to print a correct error + message rather than nothing. +- Correct false re-read last block error message when two jobs + are simultaneously writing at the end of a tape. +- Simplify exit conditions in the reserve.c code to avoid + possible non-release of reservation_lock(). +- Suffle lock order in reserve to avoid deadlock between + reservation lock and device mutex. +- Add Thorsten's VSS timeout code to 1.38 branch. +21Mar06 +- Initialize jcr mutex before first use. Thanks to Thorsten for + tracking this down for me !!!! as it broke the Win32 build. +20Mar06 +- Integrate addition of line count limitation to bsmtp -l from + Sebastian Stark +17Mar06 +- Implement regex test program in tools directory. +- Attempt to fix time problem with bsmtp with foreign langs. +- Add strip_trailing_newline() submitted by user. + 16Mar06 - Fix bug #537 to allow arbitrary time to mount a volume for restore, if polling is turned on. diff --git a/bacula/kes-1.39 b/bacula/kes-1.39 index 5d0d8922e5..75959fe56f 100644 --- a/bacula/kes-1.39 +++ b/bacula/kes-1.39 @@ -2,6 +2,22 @@ Kern Sibbald General: +24Mar06 +- Create datestyle fix for PostgreSQL. Fixes bug #574. +- Correct editing of JobId from int to int64 in fd_cmds.c +- Eliminate FileSet name race with bash_spaces() and multiple + threads by bashing in a local. +- Fix error return from 'use storage' to print a correct error + message rather than nothing. +- Correct false re-read last block error message when two jobs + are simultaneously writing at the end of a tape. +- Simplify exit conditions in the reserve.c code to avoid + possible non-release of reservation_lock(). +- Suffle lock order in reserve to avoid deadlock between + reservation lock and device mutex. +21Mar06 +- Initialize jcr mutex before first use. Thanks to Thorsten for + tracking this down for me !!!! as it broke the Win32 build. 20Mar06 - Integrate addition of line count limitation to bsmtp -l from Sebastian Stark diff --git a/bacula/src/cats/create_postgresql_database.in b/bacula/src/cats/create_postgresql_database.in index e40ef541c6..8ee437d85f 100644 --- a/bacula/src/cats/create_postgresql_database.in +++ b/bacula/src/cats/create_postgresql_database.in @@ -16,6 +16,7 @@ ENCODING="ENCODING 'SQL_ASCII'" if $bindir/psql -f - -d template1 $* <connected = true; V(mutex); return 1; diff --git a/bacula/src/dird/fd_cmds.c b/bacula/src/dird/fd_cmds.c index 8eef4c3e64..51ad085b07 100644 --- a/bacula/src/dird/fd_cmds.c +++ b/bacula/src/dird/fd_cmds.c @@ -33,7 +33,7 @@ /* Commands sent to File daemon */ static char filesetcmd[] = "fileset%s\n"; /* set full fileset */ -static char jobcmd[] = "JobId=%d Job=%s SDid=%u SDtime=%u Authorization=%s\n"; +static char jobcmd[] = "JobId=%s Job=%s SDid=%u SDtime=%u Authorization=%s\n"; /* Note, mtime_only is not used here -- implemented as file option */ static char levelcmd[] = "level = %s%s mtime_only=%d\n"; static char runbefore[] = "RunBeforeJob %s\n"; @@ -67,6 +67,7 @@ int connect_to_file_daemon(JCR *jcr, int retry_interval, int max_retry_time, int verbose) { BSOCK *fd; + char ed1[30]; if (!jcr->file_bsock) { fd = bnet_connect(jcr, retry_interval, max_retry_time, @@ -92,7 +93,7 @@ int connect_to_file_daemon(JCR *jcr, int retry_interval, int max_retry_time, /* * Now send JobId and authorization key */ - bnet_fsend(fd, jobcmd, jcr->JobId, jcr->Job, jcr->VolSessionId, + bnet_fsend(fd, jobcmd, edit_int64(jcr->JobId, ed1), jcr->Job, jcr->VolSessionId, jcr->VolSessionTime, jcr->sd_auth_key); if (strcmp(jcr->sd_auth_key, "dummy") != 0) { memset(jcr->sd_auth_key, 0, strlen(jcr->sd_auth_key)); diff --git a/bacula/src/dird/msgchan.c b/bacula/src/dird/msgchan.c index 524c2e520b..5e0124975b 100644 --- a/bacula/src/dird/msgchan.c +++ b/bacula/src/dird/msgchan.c @@ -36,7 +36,7 @@ static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; /* Commands sent to Storage daemon */ -static char jobcmd[] = "JobId=%d job=%s job_name=%s client_name=%s " +static char jobcmd[] = "JobId=%s job=%s job_name=%s client_name=%s " "type=%d level=%d FileSet=%s NoAttr=%d SpoolAttr=%d FileSetMD5=%s " "SpoolData=%d WritePartAfterJob=%d PreferMountedVols=%d\n"; static char use_storage[] = "use storage=%s media_type=%s pool_name=%s " @@ -51,7 +51,7 @@ static char OK_device[] = "3000 OK use device device=%s\n"; /* Storage Daemon requests */ static char Job_start[] = "3010 Job %127s start\n"; static char Job_end[] = - "3099 Job %127s end JobStatus=%d JobFiles=%d JobBytes=%lld\n"; + "3099 Job %127s end JobStatus=%d JobFiles=%d JobBytes=%" lld "\n"; /* Forward referenced functions */ extern "C" void *msg_thread(void *arg); @@ -128,16 +128,21 @@ bool start_storage_daemon_job(JCR *jcr, alist *rstore, alist *wstore) BSOCK *sd; char auth_key[100]; POOL_MEM store_name, device_name, pool_name, pool_type, media_type; + POOL_MEM job_name, client_name, fileset_name; int copy = 0; int stripe = 0; + char ed1[30]; sd = jcr->store_bsock; /* * Now send JobId and permissions, and get back the authorization key. */ - bash_spaces(jcr->job->hdr.name); - bash_spaces(jcr->client->hdr.name); - bash_spaces(jcr->fileset->hdr.name); + pm_strcpy(job_name, jcr->job->hdr.name); + bash_spaces(job_name); + pm_strcpy(client_name, jcr->client->hdr.name); + bash_spaces(client_name); + pm_strcpy(fileset_name, jcr->fileset->hdr.name); + bash_spaces(fileset_name); if (jcr->fileset->MD5[0] == 0) { bstrncpy(jcr->fileset->MD5, "**Dummy**", sizeof(jcr->fileset->MD5)); } @@ -151,15 +156,13 @@ bool start_storage_daemon_job(JCR *jcr, alist *rstore, alist *wstore) while (bnet_recv(sd) >= 0) { } } - bnet_fsend(sd, jobcmd, jcr->JobId, jcr->Job, jcr->job->hdr.name, - jcr->client->hdr.name, jcr->JobType, jcr->JobLevel, - jcr->fileset->hdr.name, !jcr->pool->catalog_files, + bnet_fsend(sd, jobcmd, edit_int64(jcr->JobId, ed1), jcr->Job, + job_name.c_str(), client_name.c_str(), + jcr->JobType, jcr->JobLevel, + fileset_name.c_str(), !jcr->pool->catalog_files, jcr->job->SpoolAttributes, jcr->fileset->MD5, jcr->spool_data, jcr->write_part_after_job, jcr->job->PreferMountedVolumes); Dmsg1(100, ">stored: %s\n", sd->msg); - unbash_spaces(jcr->job->hdr.name); - unbash_spaces(jcr->client->hdr.name); - unbash_spaces(jcr->fileset->hdr.name); if (bget_dirmsg(sd) > 0) { Dmsg1(100, "msg); if (sscanf(sd->msg, OKjob, &jcr->VolSessionId, @@ -217,11 +220,6 @@ bool start_storage_daemon_job(JCR *jcr, alist *rstore, alist *wstore) /* ****FIXME**** save actual device name */ ok = sscanf(sd->msg, OK_device, device_name.c_str()) == 1; } else { - POOL_MEM err_msg; - pm_strcpy(err_msg, sd->msg); /* save message */ - Jmsg(jcr, M_FATAL, 0, _("\n" - " Storage daemon didn't accept Device \"%s\" because:\n %s"), - device_name.c_str(), err_msg.c_str()/* sd->msg */); ok = false; } } @@ -252,12 +250,20 @@ bool start_storage_daemon_job(JCR *jcr, alist *rstore, alist *wstore) /* ****FIXME**** save actual device name */ ok = sscanf(sd->msg, OK_device, device_name.c_str()) == 1; } else { - POOL_MEM err_msg; + ok = false; + } + } + if (!ok) { + POOL_MEM err_msg; + if (sd->msg[0]) { pm_strcpy(err_msg, sd->msg); /* save message */ Jmsg(jcr, M_FATAL, 0, _("\n" - " Storage daemon didn't accept Device \"%s\" because:\n %s"), - device_name.c_str(), err_msg.c_str()/* sd->msg */); - ok = false; + " Storage daemon didn't accept Device \"%s\" because:\n %s"), + device_name.c_str(), err_msg.c_str()/* sd->msg */); + } else { + Jmsg(jcr, M_FATAL, 0, _("\n" + " Storage daemon didn't accept Device \"%s\" command.\n"), + device_name.c_str()); } } return ok; diff --git a/bacula/src/filed/bacula-fd.conf.in b/bacula/src/filed/bacula-fd.conf.in index dd71f4c660..1ca546d4bb 100644 --- a/bacula/src/filed/bacula-fd.conf.in +++ b/bacula/src/filed/bacula-fd.conf.in @@ -39,5 +39,5 @@ FileDaemon { # this is me # Send all messages except skipped files back to Director Messages { Name = Standard - director = @hostname@-dir = all, !skipped + director = @hostname@-dir = all, !skipped, !restored } diff --git a/bacula/src/lib/btimers.c b/bacula/src/lib/btimers.c index faa77a56e4..143dc46369 100644 --- a/bacula/src/lib/btimers.c +++ b/bacula/src/lib/btimers.c @@ -5,7 +5,7 @@ * */ /* - Copyright (C) 2004-2005 Kern Sibbald + Copyright (C) 2004-2006 Kern Sibbald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License diff --git a/bacula/src/stored/block.c b/bacula/src/stored/block.c index d1d7b2669d..5c023da781 100644 --- a/bacula/src/stored/block.c +++ b/bacula/src/stored/block.c @@ -649,12 +649,17 @@ static void reread_last_block(DCR *dcr) Jmsg(jcr, M_ERROR, 0, _("Re-read last block at EOT failed. ERR=%s"), dev->errmsg); } else { - if (lblock->BlockNumber+1 == block->BlockNumber) { - Jmsg(jcr, M_INFO, 0, _("Re-read of last block succeeded.\n")); - } else { + /* + * If we wrote block and the block numbers don't agree + * we have a possible problem. + */ + if (lblock->VolSessionId == block->VolSessionId && + lblock->BlockNumber+1 != block->BlockNumber) { Jmsg(jcr, M_ERROR, 0, _( -"Re-read of last block failed. Last block=%u Current block=%u.\n"), +"Re-read of last block OK, but block numbers differ. Last block=%u Current block=%u.\n"), lblock->BlockNumber, block->BlockNumber); + } else { + Jmsg(jcr, M_INFO, 0, _("Re-read of last block succeeded.\n")); } } free_block(lblock); diff --git a/bacula/src/stored/job.c b/bacula/src/stored/job.c index 0d188eb593..b9fa039c6f 100644 --- a/bacula/src/stored/job.c +++ b/bacula/src/stored/job.c @@ -41,7 +41,7 @@ static char jobcmd[] = "JobId=%d job=%127s job_name=%127s client_name=%127s " /* Responses sent to Director daemon */ static char OKjob[] = "3000 OK Job SDid=%u SDtime=%u Authorization=%s\n"; -static char BAD_job[] = "3915 Bad Job command: %s\n"; +static char BAD_job[] = "3915 Bad Job command. stat=%d CMD: %s\n"; //static char OK_query[] = "3001 OK query\n"; //static char NO_query[] = "3918 Query failed\n"; //static char BAD_query[] = "3917 Bad query command: %s\n"; @@ -64,22 +64,22 @@ bool job_cmd(JCR *jcr) POOL_MEM job_name, client_name, job, fileset_name, fileset_md5; int JobType, level, spool_attributes, no_attributes, spool_data; int write_part_after_job, PreferMountedVols; - + int stat; JCR *ojcr; /* * Get JobId and permissions from Director */ Dmsg1(100, "msg); - if (sscanf(dir->msg, jobcmd, &JobId, job.c_str(), job_name.c_str(), + stat = sscanf(dir->msg, jobcmd, &JobId, job.c_str(), job_name.c_str(), client_name.c_str(), &JobType, &level, fileset_name.c_str(), &no_attributes, &spool_attributes, fileset_md5.c_str(), &spool_data, - &write_part_after_job, &PreferMountedVols) != 13) { + &write_part_after_job, &PreferMountedVols); + if (stat != 13) { pm_strcpy(jcr->errmsg, dir->msg); - bnet_fsend(dir, BAD_job, jcr->errmsg); + bnet_fsend(dir, BAD_job, stat, jcr->errmsg); Dmsg1(100, ">dird: %s", dir->msg); - Emsg1(M_FATAL, 0, _("Bad Job Command from Director: %s\n"), jcr->errmsg); set_jcr_job_status(jcr, JS_ErrorTerminated); return false; } diff --git a/bacula/src/stored/reserve.c b/bacula/src/stored/reserve.c index 41c5797d4b..1859001811 100644 --- a/bacula/src/stored/reserve.c +++ b/bacula/src/stored/reserve.c @@ -409,9 +409,10 @@ static bool use_storage_cmd(JCR *jcr) */ if (ok) { bool first = true; /* print wait message once */ + bool fail = false; rctx.notify_dir = true; - for ( ; !job_canceled(jcr); ) { - lock_reservations(); /* only one thread at a time */ + lock_reservations(); + for ( ; !fail && !job_canceled(jcr); ) { while ((msg = (char *)msgs->pop())) { free(msg); } @@ -473,48 +474,41 @@ static bool use_storage_cmd(JCR *jcr) if ((ok = find_suitable_device_for_job(jcr, rctx))) { break; } - /* Unlock before possible wait */ + /* Keep reservations locked *except* during wait_for_device() */ unlock_reservations(); if (!rctx.suitable_device || !wait_for_device(jcr, first)) { Dmsg0(100, "Fail. !suitable_device || !wait_for_device\n"); - break; /* Get out, failure ... */ + fail = true; } + lock_reservations(); first = false; bnet_sig(dir, BNET_HEARTBEAT); /* Inform Dir that we are alive */ } - /* Note if !ok then search_lock is already cleared */ - if (ok) { - unlock_reservations(); - goto all_done; - } - - /* - * If we get here, there are no suitable devices available, which - * means nothing configured. If a device is suitable but busy - * with another Volume, we will not come here. - */ - if (verbose) { + unlock_reservations(); + if (!ok) { + /* + * If we get here, there are no suitable devices available, which + * means nothing configured. If a device is suitable but busy + * with another Volume, we will not come here. + */ unbash_spaces(dir->msg); pm_strcpy(jcr->errmsg, dir->msg); Jmsg(jcr, M_INFO, 0, _("Failed command: %s\n"), jcr->errmsg); - } - Jmsg(jcr, M_FATAL, 0, _("\n" - " Device \"%s\" with MediaType \"%s\" requested by DIR not found in SD Device resources.\n"), - dev_name.c_str(), media_type.c_str()); - bnet_fsend(dir, NO_device, dev_name.c_str()); + Jmsg(jcr, M_FATAL, 0, _("\n" + " Device \"%s\" with MediaType \"%s\" requested by DIR not found in SD Device resources.\n"), + dev_name.c_str(), media_type.c_str()); + bnet_fsend(dir, NO_device, dev_name.c_str()); - Dmsg1(100, ">dird: %s", dir->msg); + Dmsg1(100, ">dird: %s", dir->msg); + } } else { unbash_spaces(dir->msg); pm_strcpy(jcr->errmsg, dir->msg); - if (verbose) { - Jmsg(jcr, M_INFO, 0, _("Failed command: %s\n"), jcr->errmsg); - } + Jmsg(jcr, M_FATAL, 0, _("Failed command: %s\n"), jcr->errmsg); bnet_fsend(dir, BAD_use, jcr->errmsg); Dmsg1(100, ">dird: %s", dir->msg); } -all_done: release_msgs(jcr); return ok; } @@ -748,7 +742,10 @@ static bool reserve_device_for_read(DCR *dcr) ASSERT(dcr); + /* Get locks in correct order */ + unlock_reservations(); P(dev->mutex); + lock_reservations(); if (is_device_unmounted(dev)) { Dmsg1(200, "Device %s is BLOCKED due to user unmount.\n", dev->print_name()); diff --git a/bacula/src/version.h b/bacula/src/version.h index 69c4634952..30a5341135 100644 --- a/bacula/src/version.h +++ b/bacula/src/version.h @@ -4,8 +4,8 @@ #undef VERSION #define VERSION "1.39.6" -#define BDATE "20 March 2006" -#define LSMDATE "20Mar06" +#define BDATE "24 March 2006" +#define LSMDATE "24Mar06" /* Debug flags */ #undef DEBUG