From e8a87fd0a4936b7cc1e8e182b644fb207fcb9bad Mon Sep 17 00:00:00 2001 From: Kern Sibbald Date: Fri, 14 Sep 2007 09:49:06 +0000 Subject: [PATCH] Commit backports from 2.3.x git-svn-id: https://bacula.svn.sourceforge.net/svnroot/bacula/branches/Branch-2.2@5552 91ce42f0-d328-0410-95d8-f526ca767f89 --- bacula/ChangeLog | 49 +++++++++++++++++++++++++++++++++++++ bacula/ReleaseNotes | 30 ++++++++++++++++++++++- bacula/src/dird/backup.c | 9 ++++--- bacula/src/dird/dird.c | 18 ++++++++------ bacula/src/dird/job.c | 40 ++++++++++++++++++++++++++++++ bacula/src/dird/protos.h | 1 + bacula/src/dird/restore.c | 4 +++ bacula/src/dird/ua_cmds.c | 2 +- bacula/src/dird/verify.c | 29 +++++++++++++--------- bacula/src/filed/filed.c | 13 ++++++++-- bacula/src/lib/tls.c | 4 +-- bacula/src/stored/acquire.c | 9 ++++++- bacula/src/stored/dircmd.c | 6 ++--- bacula/src/stored/mount.c | 4 ++- bacula/src/stored/stored.c | 3 ++- bacula/technotes-2.1 | 29 ++++++++++++++++++++++ 16 files changed, 215 insertions(+), 35 deletions(-) diff --git a/bacula/ChangeLog b/bacula/ChangeLog index 48a2661c10..09bf99e7bc 100644 --- a/bacula/ChangeLog +++ b/bacula/ChangeLog @@ -1,6 +1,55 @@ Technical notes on version 2.2.x General: +Release Version 2.2.4 +14Sep07 +kes Increase size of name string when FD making connection to SD. + May fix bug #953. +13Sep07 +kes Add code to try to fix bug #908. +kes Add waits to multiple exit detection code to try to force pid + file to always be deleted. +kes Restore good dev.tar.gz to rescue set appropriate binary property. + This fixes bug #950. +kes Fix seg fault in error exit of acquire_for_read after unsuccessfully + trying to switch drives by checking for blocking before unblocking. + Fixes bug #906. +kes Cancel storage daemon in all cases where FD reports error. This + should fix virtually all cases of bug #920. +12Sep07 +kes Fix error message that was clobbered when Dir tells SD it does not + have write permission on Volume. This should fix a minor point + in bug #942, but not the main problem. +kes Add code to cancel job in SD if FD connection fails. This should + fix bug #920. +kes Add code in FD exit to prevent loops and a crash on FreeBSD. +kes Fix migration code to get correct Volume name with multiple volumes + by skipping |. Fixes bug #936. +kes Implement patch supplied by Landon to fix bug #944 where using + TLS with bconsole uses 99+% of the CPU. +kes Note, you need GTK >= 2.10 to be able to link the Tray Monitor + program. +kes Move patches into patches directory. +11Sep07 +ebl Fix bug #946 about "bacula-dir -t" which doesn't works + as expected. +09Sep07 +ebl Using "m" in bconsole will show messages like before, + and not memory usage. + +Release Version 2.2.3 +kes Note, you need GTK >= 2.10 to be able to link the Tray Monitor + program. +09Sep07 +kes Fix bug #935, and probably also bug #903 where files were not + restored. MediaId was not properly set in JobMedia record after + a Volume change. +07Sep07 +kes Add ./configure search in qwt-qt4 for qwt package +kes Apply Martin Simmons patch that should turn off the new API usage + when batch insert is turned off allowing building on older + PostgreSQLs. + Release Version 2.2.2 04Sep07 ebl Detect if new PosgreSQL batch insert API is present. diff --git a/bacula/ReleaseNotes b/bacula/ReleaseNotes index aa13e14907..b80d2a540a 100644 --- a/bacula/ReleaseNotes +++ b/bacula/ReleaseNotes @@ -1,5 +1,5 @@ - Release Notes for Bacula 2.2.3 + Release Notes for Bacula 2.2.4 Bacula code: Total files = 520 Total lines = 195,550 (*.h *.c *.in) 82 new files, 41,221 new lines of code, 208,380 lines of change from 2.0.3 @@ -10,6 +10,34 @@ use some of the new features that affect the FD. In other words, you should not have to upgrade all your File daemons when you upgrade. There is no database upgrade needed from version 2.0.x to 2.2.0. +Version 2.2.4 is a minor bug fix release to version 2.2.3 +- Possible fix for authorization problems bug #953. +- Possibel fix for bug #908. +- Add waits to multiple exit detection code to try to force pid + file to always be deleted. +- Restore good dev.tar.gz to rescue set appropriate binary property. + This fixes bug #950. +- Fix seg fault in error exit of acquire_for_read after unsuccessfully + trying to switch drives by checking for blocking before unblocking. + Fixes bug #906. +- Cancel storage daemon in all cases where FD reports error. This + should fix virtually all cases of bug #920 and will ensure that Devices + are released as soon as possible. +- Fix error message that was clobbered when Dir tells SD it does not + have write permission on Volume. This should fix a minor point + in bug #942, but not the main problem. +- Fix migration code to get correct Volume name with multiple volumes + by skipping |. Fixes bug #936. +- Implement patch supplied by Landon to fix bug #944 where using + TLS with bconsole uses 99+% of the CPU. +- Fix bug #946 about "bacula-dir -t" which doesn't works + as expected. +- Using "m" in bconsole will show messages as in prior versions + and not memory usage. + +- Note, you need GTK >= 2.10 to be able to link the Tray Monitor + program. + Version 2.2.3 is a critical bug fix release to version 2.2.2 - Fix bug #935, and possibly also bug #903 where files were not restored. MediaId was not properly set in JobMedia record after diff --git a/bacula/src/dird/backup.c b/bacula/src/dird/backup.c index a6a0b0aeed..ba1bdfe00d 100644 --- a/bacula/src/dird/backup.c +++ b/bacula/src/dird/backup.c @@ -245,9 +245,7 @@ bail_out: set_jcr_job_status(jcr, JS_ErrorTerminated); Dmsg1(400, "wait for sd. use=%d\n", jcr->use_count()); /* Cancel SD */ - if (jcr->store_bsock) { - jcr->store_bsock->fsend("cancel Job=%s\n", jcr->Job); - } + cancel_storage_daemon_job(jcr); wait_for_storage_daemon_termination(jcr); Dmsg1(400, "after wait for sd. use=%d\n", jcr->use_count()); return false; @@ -297,6 +295,11 @@ int wait_for_job_termination(JCR *jcr) } bnet_sig(fd, BNET_TERMINATE); /* tell Client we are terminating */ + /* Force cancel in SD if failing */ + if (job_canceled(jcr) || !fd_ok) { + cancel_storage_daemon_job(jcr); + } + /* Note, the SD stores in jcr->JobFiles/ReadBytes/JobBytes/Errors */ wait_for_storage_daemon_termination(jcr); diff --git a/bacula/src/dird/dird.c b/bacula/src/dird/dird.c index d34acd8efb..45ae5a6818 100644 --- a/bacula/src/dird/dird.c +++ b/bacula/src/dird/dird.c @@ -228,15 +228,17 @@ int main (int argc, char *argv[]) Jmsg((JCR *)NULL, M_ERROR_TERM, 0, _("Please correct configuration file: %s\n"), configfile); } - if (background) { - daemon_start(); - init_stack_dump(); /* grab new pid */ + if (!test_config) { /* we don't need to do this block in test mode */ + if (background) { + daemon_start(); + init_stack_dump(); /* grab new pid */ + } + + /* Create pid must come after we are a daemon -- so we have our final pid */ + create_pid_file(director->pid_directory, "bacula-dir", get_first_port_host_order(director->DIRaddrs)); + read_state_file(director->working_directory, "bacula-dir", get_first_port_host_order(director->DIRaddrs)); } - /* Create pid must come after we are a daemon -- so we have our final pid */ - create_pid_file(director->pid_directory, "bacula-dir", get_first_port_host_order(director->DIRaddrs)); - read_state_file(director->working_directory, "bacula-dir", get_first_port_host_order(director->DIRaddrs)); - drop(uid, gid); /* reduce privileges if requested */ if (!check_catalog()) { @@ -317,6 +319,7 @@ void terminate_dird(int sig) static bool already_here = false; if (already_here) { /* avoid recursive temination problems */ + bmicrosleep(2, 0); /* yield */ exit(1); } already_here = true; @@ -324,7 +327,6 @@ void terminate_dird(int sig) generate_daemon_event(NULL, "Exit"); write_state_file(director->working_directory, "bacula-dir", get_first_port_host_order(director->DIRaddrs)); delete_pid_file(director->pid_directory, "bacula-dir", get_first_port_host_order(director->DIRaddrs)); -// signal(SIGCHLD, SIG_IGN); /* don't worry about children now */ term_scheduler(); term_job_server(); if (runjob) { diff --git a/bacula/src/dird/job.c b/bacula/src/dird/job.c index 70be904b25..406a43e3bb 100644 --- a/bacula/src/dird/job.c +++ b/bacula/src/dird/job.c @@ -427,6 +427,46 @@ bool cancel_job(UAContext *ua, JCR *jcr) return true; } +void cancel_storage_daemon_job(JCR *jcr) +{ + UAContext *ua = new_ua_context(jcr); + JCR *control_jcr = new_control_jcr("*JobCancel*", JT_SYSTEM); + BSOCK *sd; + + ua->jcr = control_jcr; + if (jcr->store_bsock) { + if (!ua->jcr->wstorage) { + if (jcr->rstorage) { + copy_wstorage(ua->jcr, jcr->rstorage, _("Job resource")); + } else { + copy_wstorage(ua->jcr, jcr->wstorage, _("Job resource")); + } + } else { + USTORE store; + if (jcr->rstorage) { + store.store = jcr->rstore; + } else { + store.store = jcr->wstore; + } + set_wstorage(ua->jcr, &store); + } + + if (!connect_to_storage_daemon(ua->jcr, 10, SDConnectTimeout, 1)) { + goto bail_out; + } + Dmsg0(200, "Connected to storage daemon\n"); + sd = ua->jcr->store_bsock; + sd->fsend("cancel Job=%s\n", jcr->Job); + while (sd->recv() >= 0) { + } + sd->signal(BNET_TERMINATE); + sd->close(); + ua->jcr->store_bsock = NULL; + } +bail_out: + free_jcr(control_jcr); + free_ua_context(ua); +} static void job_monitor_destructor(watchdog_t *self) { diff --git a/bacula/src/dird/protos.h b/bacula/src/dird/protos.h index 10d27e31eb..c0ad0d5e46 100644 --- a/bacula/src/dird/protos.h +++ b/bacula/src/dird/protos.h @@ -128,6 +128,7 @@ extern void create_clones(JCR *jcr); extern bool create_restore_bootstrap_file(JCR *jcr); extern void dird_free_jcr(JCR *jcr); extern void dird_free_jcr_pointers(JCR *jcr); +extern void cancel_storage_daemon_job(JCR *jcr); /* migration.c */ extern bool do_migration(JCR *jcr); diff --git a/bacula/src/dird/restore.c b/bacula/src/dird/restore.c index 446aac6881..978f81476a 100644 --- a/bacula/src/dird/restore.c +++ b/bacula/src/dird/restore.c @@ -246,6 +246,10 @@ void restore_cleanup(JCR *jcr, int TermCode) jcr->unlink_bsr = false; } + if (job_canceled(jcr)) { + cancel_storage_daemon_job(jcr); + } + switch (TermCode) { case JS_Terminated: if (jcr->ExpectedFiles > jcr->jr.JobFiles) { diff --git a/bacula/src/dird/ua_cmds.c b/bacula/src/dird/ua_cmds.c index d312e5b540..ed49b1a3ff 100644 --- a/bacula/src/dird/ua_cmds.c +++ b/bacula/src/dird/ua_cmds.c @@ -116,8 +116,8 @@ static struct cmdstruct commands[] = { { NT_("list"), list_cmd, _("list [pools | jobs | jobtotals | media | files ]; from catalog")}, { NT_("label"), label_cmd, _("label a tape")}, { NT_("llist"), llist_cmd, _("full or long list like list command")}, - { NT_("memory"), memory_cmd, _("print current memory usage")}, { NT_("messages"), messagescmd, _("messages")}, + { NT_("memory"), memory_cmd, _("print current memory usage")}, { NT_("mount"), mount_cmd, _("mount ")}, { NT_("prune"), prunecmd, _("prune expired records from catalog")}, { NT_("purge"), purgecmd, _("purge records from catalog")}, diff --git a/bacula/src/dird/verify.c b/bacula/src/dird/verify.c index 85651452b4..63cecc57f1 100644 --- a/bacula/src/dird/verify.c +++ b/bacula/src/dird/verify.c @@ -211,7 +211,7 @@ bool do_verify(JCR *jcr) */ set_jcr_job_status(jcr, JS_Blocked); if (!connect_to_file_daemon(jcr, 10, FDConnectTimeout, 1)) { - return false; + goto bail_out; } set_jcr_job_status(jcr, JS_Running); @@ -220,12 +220,12 @@ bool do_verify(JCR *jcr) Dmsg0(30, ">filed: Send include list\n"); if (!send_include_list(jcr)) { - return false; + goto bail_out; } Dmsg0(30, ">filed: Send exclude list\n"); if (!send_exclude_list(jcr)) { - return false; + goto bail_out; } /* @@ -248,7 +248,7 @@ bool do_verify(JCR *jcr) } bnet_fsend(fd, storaddr, jcr->rstore->address, jcr->rstore->SDDport); if (!response(jcr, fd, OKstore, "Storage", DISPLAY_ERROR)) { - return false; + goto bail_out; } /* @@ -256,12 +256,12 @@ bool do_verify(JCR *jcr) */ if (!send_bootstrap_file(jcr, fd) || !response(jcr, fd, OKbootstrap, "Bootstrap", DISPLAY_ERROR)) { - return false; + goto bail_out; } if (!jcr->RestoreBootstrap) { Jmsg0(jcr, M_FATAL, 0, _("Deprecated feature ... use bootstrap.\n")); - return false; + goto bail_out; } level = "volume"; @@ -275,19 +275,19 @@ bool do_verify(JCR *jcr) default: Jmsg2(jcr, M_FATAL, 0, _("Unimplemented Verify level %d(%c)\n"), jcr->JobLevel, jcr->JobLevel); - return false; + goto bail_out; } if (!send_runscripts_commands(jcr)) { - return false; + goto bail_out; } /* * Send verify command/level to File daemon */ - bnet_fsend(fd, verifycmd, level); + fd->fsend(verifycmd, level); if (!response(jcr, fd, OKverify, "Verify", DISPLAY_ERROR)) { - return false; + goto bail_out; } /* @@ -328,7 +328,7 @@ bool do_verify(JCR *jcr) default: Jmsg1(jcr, M_FATAL, 0, _("Unimplemented verify level %d\n"), jcr->JobLevel); - return false; + goto bail_out; } stat = wait_for_job_termination(jcr); @@ -336,6 +336,9 @@ bool do_verify(JCR *jcr) verify_cleanup(jcr, stat); return true; } + +bail_out: + verify_cleanup(jcr, JS_ErrorTerminated); return false; } @@ -373,6 +376,10 @@ void verify_cleanup(JCR *jcr, int TermCode) update_job_end(jcr, TermCode); + if (job_canceled(jcr)) { + cancel_storage_daemon_job(jcr); + } + if (jcr->unlink_bsr && jcr->RestoreBootstrap) { unlink(jcr->RestoreBootstrap); jcr->unlink_bsr = false; diff --git a/bacula/src/filed/filed.c b/bacula/src/filed/filed.c index 3f96b0d913..277e32d994 100644 --- a/bacula/src/filed/filed.c +++ b/bacula/src/filed/filed.c @@ -232,6 +232,15 @@ int main (int argc, char *argv[]) void terminate_filed(int sig) { + static bool already_here = false; + + if (already_here) { + bmicrosleep(2, 0); /* yield */ + exit(1); /* prevent loops */ + } + already_here = true; + stop_watchdog(); + bnet_stop_thread_server(server_tid); generate_daemon_event(NULL, "Exit"); write_state_file(me->working_directory, "bacula-fd", get_first_port_host_order(me->FDaddrs)); @@ -240,12 +249,12 @@ void terminate_filed(int sig) if (configfile != NULL) { free(configfile); } + if (debug_level > 0) { print_memory_pool_stats(); } - free_config_resources(); term_msg(); - stop_watchdog(); + free_config_resources(); cleanup_crypto(); close_memory_pool(); /* release free memory in pool */ sm_dump(false); /* dump orphaned buffers */ diff --git a/bacula/src/lib/tls.c b/bacula/src/lib/tls.c index 26c03c1f52..71a6fa2841 100644 --- a/bacula/src/lib/tls.c +++ b/bacula/src/lib/tls.c @@ -488,7 +488,7 @@ static inline bool openssl_bsock_session_start(BSOCK *bsock, bool server) tv.tv_sec = 10; tv.tv_usec = 0; /* Block until we can read */ - select(fdmax, &fdset, NULL, &fdset, &tv); + select(fdmax, &fdset, NULL, NULL, &tv); break; case SSL_ERROR_WANT_WRITE: /* If we timeout of a select, this will be unset */ @@ -497,7 +497,7 @@ static inline bool openssl_bsock_session_start(BSOCK *bsock, bool server) tv.tv_sec = 10; tv.tv_usec = 0; /* Block until we can write */ - select(fdmax, NULL, &fdset, &fdset, &tv); + select(fdmax, NULL, &fdset, NULL, &tv); break; default: /* Socket Error Occured */ diff --git a/bacula/src/stored/acquire.c b/bacula/src/stored/acquire.c index 5736d70334..9bb22a7809 100644 --- a/bacula/src/stored/acquire.c +++ b/bacula/src/stored/acquire.c @@ -292,7 +292,14 @@ get_out: Dmsg2(50, "Dec reserve=%d dev=%s\n", dev->reserved_device, dev->print_name()); dcr->reserved_device = false; } - dev->dunblock(DEV_LOCKED); + /* + * Normally we are blocked, but in at least one error case above + * we are not blocked because we unsuccessfully tried changing + * devices. + */ + if (dev->is_blocked()) { + dev->dunblock(DEV_LOCKED); + } Dmsg1(950, "jcr->dcr=%p\n", jcr->dcr); return ok; } diff --git a/bacula/src/stored/dircmd.c b/bacula/src/stored/dircmd.c index 205fe140df..396e68a6e4 100644 --- a/bacula/src/stored/dircmd.c +++ b/bacula/src/stored/dircmd.c @@ -148,7 +148,7 @@ void *handle_connection_request(void *arg) int i; bool found, quit; int bnet_stat = 0; - char name[MAX_NAME_LENGTH]; + char name[500]; if (bs->recv() <= 0) { Emsg0(M_ERROR, 0, _("Connection request failed.\n")); @@ -159,7 +159,7 @@ void *handle_connection_request(void *arg) /* * Do a sanity check on the message received */ - if (bs->msglen < 25 || bs->msglen > (int)sizeof(name)-25) { + if (bs->msglen < 25 || bs->msglen > (int)sizeof(name)) { Emsg1(M_ERROR, 0, _("Invalid connection. Len=%d\n"), bs->msglen); bnet_close(bs); return NULL; @@ -294,13 +294,11 @@ static bool cancel_cmd(JCR *cjcr) if (!(jcr=get_jcr_by_full_name(Job))) { bnet_fsend(dir, _("3904 Job %s not found.\n"), Job); } else { - jcr->lock(); oldStatus = jcr->JobStatus; set_jcr_job_status(jcr, JS_Canceled); if (!jcr->authenticated && oldStatus == JS_WaitFD) { pthread_cond_signal(&jcr->job_start_wait); /* wake waiting thread */ } - jcr->unlock(); if (jcr->file_bsock) { bnet_sig(jcr->file_bsock, BNET_TERMINATE); } else { diff --git a/bacula/src/stored/mount.c b/bacula/src/stored/mount.c index 102ef85c4b..0b91ce6b74 100644 --- a/bacula/src/stored/mount.c +++ b/bacula/src/stored/mount.c @@ -276,6 +276,8 @@ read_volume: bstrncpy(VolumeName, dcr->VolumeName, sizeof(VolumeName)); bstrncpy(dcr->VolumeName, dev->VolHdr.VolumeName, sizeof(dcr->VolumeName)); if (!dir_get_volume_info(dcr, GET_VOL_INFO_FOR_WRITE)) { + POOL_MEM vol_info_msg; + pm_strcpy(vol_info_msg, jcr->dir_bsock->msg); /* save error message */ /* Restore desired volume name, note device info out of sync */ /* This gets the info regardless of the Pool */ bstrncpy(dcr->VolumeName, dev->VolHdr.VolumeName, sizeof(dcr->VolumeName)); @@ -293,7 +295,7 @@ read_volume: " Current Volume \"%s\" not acceptable because:\n" " %s"), dcrVolCatInfo.VolCatName, dev->VolHdr.VolumeName, - jcr->dir_bsock->msg); + vol_info_msg.c_str()); ask = true; /* Restore saved DCR before continuing */ bstrncpy(dcr->VolumeName, VolumeName, sizeof(dcr->VolumeName)); diff --git a/bacula/src/stored/stored.c b/bacula/src/stored/stored.c index 996f4fc358..070e788d8f 100644 --- a/bacula/src/stored/stored.c +++ b/bacula/src/stored/stored.c @@ -534,6 +534,7 @@ void terminate_stored(int sig) JCR *jcr; if (in_here) { /* prevent loops */ + bmicrosleep(2, 0); /* yield */ exit(1); } in_here = true; @@ -600,10 +601,10 @@ void terminate_stored(int sig) if (debug_level > 10) { print_memory_pool_stats(); } - term_reservations_lock(); term_msg(); cleanup_crypto(); free_volume_list(); + term_reservations_lock(); close_memory_pool(); sm_dump(false); /* dump orphaned buffers */ diff --git a/bacula/technotes-2.1 b/bacula/technotes-2.1 index 5be0b7b220..0c4fbb8cef 100644 --- a/bacula/technotes-2.1 +++ b/bacula/technotes-2.1 @@ -1,12 +1,41 @@ Technical notes on version 2.2 General: +Release Version 2.2.4 +14Sep07 +kes Increase size of name string when FD making connection to SD. + May fix bug #953. +13Sep07 +kes Add code to try to fix bug #908. +kes Add waits to multiple exit detection code to try to force pid + file to always be deleted. +kes Restore good dev.tar.gz to rescue set appropriate binary property. + This fixes bug #950. +kes Fix seg fault in error exit of acquire_for_read after unsuccessfully + trying to switch drives by checking for blocking before unblocking. + Fixes bug #906. +kes Cancel storage daemon in all cases where FD reports error. This + should fix virtually all cases of bug #920. 12Sep07 +kes Fix error message that was clobbered when Dir tells SD it does not + have write permission on Volume. This should fix a minor point + in bug #942, but not the main problem. +kes Add code to cancel job in SD if FD connection fails. This should + fix bug #920. +kes Add code in FD exit to prevent loops and a crash on FreeBSD. kes Fix migration code to get correct Volume name with multiple volumes by skipping |. Fixes bug #936. kes Implement patch supplied by Landon to fix bug #944 where using TLS with bconsole uses 99+% of the CPU. +kes Note, you need GTK >= 2.10 to be able to link the Tray Monitor + program. kes Move patches into patches directory. +11Sep07 +ebl Fix bug #946 about "bacula-dir -t" which doesn't works + as expected. +09Sep07 +ebl Using "m" in bconsole will show messages like before, + and not memory usage. Release Version 2.2.3 kes Note, you need GTK >= 2.10 to be able to link the Tray Monitor -- 2.39.5