From fe3cc7407f557f6881075dd6e546fef71a5afdee Mon Sep 17 00:00:00 2001 From: Kern Sibbald Date: Tue, 14 Mar 2006 19:37:20 +0000 Subject: [PATCH] - Add more jcr methods and make mutex and use_count private. - Create lock/unlock methods for jcr. - Fix PostgreSQL bug doing sql_data_seeek() by explicitly reading records to get to seek position. - Integrate patch from bug #561 to correct conio.c signal definitions. - Fix Rescheduling failed Jobs. Ensure that SD message thread terminates correctly by doing pthread_kill(). Do not destroy SD cond wait variable between executions of the job. Use local mutex for cond variable to avoid blocking jcr chain. Fix poor use of jcr use count in jobq.c for restarted jobs. - Fix obsolete usage of foreach_dlist() to use foreach_jcr() in lib/jcr.c -- prevents locking the jcr chaing. - Apply patch from bug #564, which corrects listing volumes with multiple autochangers. Apply same fix to next volume list. - Fix bug #562 where restore bootstrap file is not unique. - More details in implementing David's migration syntax proposal. - Save and restore dcr when swapping drives so that if we have a write dcr it is not lost. - Use new routine lock_reservations() to lock the reservations system, and call it while looking for a volume in askdir.c. This could possibly fix bug #543. - Stop SD command loop if job is canceled. git-svn-id: https://bacula.svn.sourceforge.net/svnroot/bacula/trunk@2832 91ce42f0-d328-0410-95d8-f526ca767f89 --- bacula/ReleaseNotes | 36 +++++++- bacula/kernstodo | 2 + bacula/kes-1.38 | 28 ++++++ bacula/kes-1.39 | 24 +++++ bacula/src/cats/sql_find.c | 27 +++--- bacula/src/console/conio.c | 6 +- bacula/src/dird/backup.c | 24 +++-- bacula/src/dird/bsr.c | 13 ++- bacula/src/dird/catreq.c | 5 +- bacula/src/dird/fd_cmds.c | 6 +- bacula/src/dird/job.c | 24 ++--- bacula/src/dird/jobq.c | 47 +++++----- bacula/src/dird/migrate.c | 70 +++++++++++++- bacula/src/dird/msgchan.c | 41 +++++---- bacula/src/dird/next_vol.c | 3 +- bacula/src/dird/ua_output.c | 11 ++- bacula/src/dird/ua_prune.c | 4 +- bacula/src/dird/ua_restore.c | 9 +- bacula/src/dird/ua_status.c | 9 ++ bacula/src/filed/backup.c | 4 +- bacula/src/filed/job.c | 7 +- bacula/src/filed/restore.c | 4 +- bacula/src/filed/status.c | 4 +- bacula/src/filed/verify.c | 136 +++++++++++++-------------- bacula/src/filed/verify_vol.c | 158 ++++++++++++++++---------------- bacula/src/jcr.h | 17 +++- bacula/src/lib/jcr.c | 48 +++++----- bacula/src/lib/rwlock.c | 17 ++-- bacula/src/lib/rwlock.h | 17 ++-- bacula/src/stored/acquire.c | 8 ++ bacula/src/stored/askdir.c | 17 ++-- bacula/src/stored/autochanger.c | 42 +++++---- bacula/src/stored/bls.c | 2 +- bacula/src/stored/butil.c | 6 +- bacula/src/stored/dev.c | 8 +- bacula/src/stored/dircmd.c | 4 +- bacula/src/stored/fd_cmds.c | 17 ++-- bacula/src/stored/job.c | 10 +- bacula/src/stored/mac.c | 62 +++++++------ bacula/src/stored/protos.h | 6 +- bacula/src/stored/read_record.c | 4 +- bacula/src/stored/reserve.c | 153 ++++++++++++++++++++++--------- bacula/src/stored/stored.c | 17 ++-- bacula/src/stored/stored_conf.h | 2 +- bacula/src/version.h | 4 +- 45 files changed, 701 insertions(+), 462 deletions(-) diff --git a/bacula/ReleaseNotes b/bacula/ReleaseNotes index 58461edee3..53f9bbb104 100644 --- a/bacula/ReleaseNotes +++ b/bacula/ReleaseNotes @@ -1,9 +1,16 @@ - Release Notes for Bacula 1.38.6-beta3 + Release Notes for Bacula 1.38.6-beta5 - Bacula code: Total files = 418 Total lines = 136,328 (*.h *.c *.in) + Bacula code: Total files = 419 Total lines = 137,078 (*.h *.c *.in) 20,440 additional lines of code since version 1.36.3 +!!!! Important !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + If you are compiling for a 64 bit machine, you need to ensure + that the code is compiled with the -O0 (- oh zero) option otherwise + the SD will crash on most all operations -- apparently due to + a compiler bug in gcc's 64 bit code generation. +!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + New features: - For autochanger get Scratch tape if in autochanger if no appendable Volumes are available. @@ -20,8 +27,7 @@ New features: to the unique job identification that Bacula creates for each job. - The job report for Backups has a few more user friendly ways - of displaying the information submitted by John Kodis - . + of displaying the information. - The wait command can now be made to wait for jobids. - New command line keywords are permitted in update volume. They are Inchanger=yes/no, slot=nn. @@ -43,6 +49,26 @@ Major bug fixes: Minor bug fixes: - See below: +Release 1.38.6 beta5 14Mar06 +- Add more jcr methods and make mutex and use_count private. +- Create lock/unlock methods for jcr. +- Fix PostgreSQL bug doing sql_data_seeek() by explicitly reading + records to get to seek position. +- Integrate patch from bug #561 to correct conio.c signal definitions. +- Fix Rescheduling failed Jobs. Ensure that SD message thread + terminates correctly by doing pthread_kill(). Do not destroy + SD cond wait variable between executions of the job. Use local + mutex for cond variable to avoid blocking jcr chain. Fix poor + use of jcr use count in jobq.c for restarted jobs. +- Fix obsolete usage of foreach_dlist() to use foreach_jcr() in + lib/jcr.c -- prevents locking the jcr chaing. +- Apply patch from bug #564, which corrects listing volumes with + multiple autochangers. Apply same fix to next volume list. +- Fix bug #562 where restore bootstrap file is not unique. +- Use new routine lock_reservations() to lock the reservations + system, and call it while looking for a volume in askdir.c. + This could possibly fix bug #543. +- Stop SD command loop if job is canceled. Release 1.38.6 beta3 4Mar06 04Mar06 @@ -55,7 +81,7 @@ Release 1.38.6 beta3 4Mar06 be the numeric (non-unique) job id. - Allow listing by any of the above. - Add the user friendly job report code for reporting job elapsed time - and rates with suffexes submitted by John Kodis + and rates with suffexes. - Add Priority and JobLevel as Python settable items. - Use TEMPORARY table creation where the table is created by Bacula. diff --git a/bacula/kernstodo b/bacula/kernstodo index 071337cd97..017b3b4fa9 100644 --- a/bacula/kernstodo +++ b/bacula/kernstodo @@ -16,6 +16,8 @@ Document: Priority: For 1.39: +- Some users claim that they must do two prune commands to get a + Volume marked as purged. - Print warning message if LANG environment variable does not specify UTF-8. === Migration from David === diff --git a/bacula/kes-1.38 b/bacula/kes-1.38 index 11b8044742..4f2221c8f5 100644 --- a/bacula/kes-1.38 +++ b/bacula/kes-1.38 @@ -2,6 +2,34 @@ Kern Sibbald General: +14Mar06 +- Add more jcr methods and make mutex and use_count private. +- Create lock/unlock methods for jcr. +- Fix PostgreSQL bug doing sql_data_seeek() by explicitly reading + records to get to seek position. +- Integrate patch from bug #561 to correct conio.c signal definitions. +- Fix Rescheduling failed Jobs. Ensure that SD message thread + terminates correctly by doing pthread_kill(). Do not destroy + SD cond wait variable between executions of the job. Use local + mutex for cond variable to avoid blocking jcr chain. Fix poor + use of jcr use count in jobq.c for restarted jobs. +- Fix obsolete usage of foreach_dlist() to use foreach_jcr() in + lib/jcr.c -- prevents locking the jcr chaing. +- Apply patch from bug #564, which corrects listing volumes with + multiple autochangers. Apply same fix to next volume list. +- Fix bug #562 where restore bootstrap file is not unique. +- Use new routine lock_reservations() to lock the reservations + system, and call it while looking for a volume in askdir.c. + This could possibly fix bug #543. +- Stop SD command loop if job is canceled. + +09Mar06 +- Use John Kodis' edit_with_suffix code to edit VolBytes. +- Backport some fixes for acquire.c. Most importantly, to explicitly + have a reserved_device flag for acquire_for_read(). Hopefully + this will correct the remaining issues with failed restores holding + a drive. +- Add a job_canceled() check in SD command loop. Release 1.38.6 beta3 4Mar06 04Mar06 diff --git a/bacula/kes-1.39 b/bacula/kes-1.39 index 5da536694b..8ba0f81523 100644 --- a/bacula/kes-1.39 +++ b/bacula/kes-1.39 @@ -2,6 +2,30 @@ Kern Sibbald General: +14Mar06 +- Add more jcr methods and make mutex and use_count private. +- Create lock/unlock methods for jcr. +- Fix PostgreSQL bug doing sql_data_seeek() by explicitly reading + records to get to seek position. +- Integrate patch from bug #561 to correct conio.c signal definitions. +- Fix Rescheduling failed Jobs. Ensure that SD message thread + terminates correctly by doing pthread_kill(). Do not destroy + SD cond wait variable between executions of the job. Use local + mutex for cond variable to avoid blocking jcr chain. Fix poor + use of jcr use count in jobq.c for restarted jobs. +- Fix obsolete usage of foreach_dlist() to use foreach_jcr() in + lib/jcr.c -- prevents locking the jcr chaing. +- Apply patch from bug #564, which corrects listing volumes with + multiple autochangers. Apply same fix to next volume list. +- Fix bug #562 where restore bootstrap file is not unique. +- More details in implementing David's migration syntax proposal. +- Save and restore dcr when swapping drives so that if we have + a write dcr it is not lost. +- Use new routine lock_reservations() to lock the reservations + system, and call it while looking for a volume in askdir.c. + This could possibly fix bug #543. +- Stop SD command loop if job is canceled. + 08Mar06 - Remove old code from findlib/create_file.c - Rename mac.c migrate.c diff --git a/bacula/src/cats/sql_find.c b/bacula/src/cats/sql_find.c index 641a1e7700..92f6a2bd22 100644 --- a/bacula/src/cats/sql_find.c +++ b/bacula/src/cats/sql_find.c @@ -262,7 +262,7 @@ db_find_last_jobid(JCR *jcr, B_DB *mdb, const char *Name, JOB_DBR *jr) int db_find_next_volume(JCR *jcr, B_DB *mdb, int item, bool InChanger, MEDIA_DBR *mr) { - SQL_ROW row; + SQL_ROW row = NULL; int numrows; const char *order; @@ -313,23 +313,26 @@ db_find_next_volume(JCR *jcr, B_DB *mdb, int item, bool InChanger, MEDIA_DBR *mr } numrows = sql_num_rows(mdb); - if (item > numrows) { - Mmsg2(&mdb->errmsg, _("Request for Volume item %d greater than max %d\n"), + if (item > numrows || item < 1) { + Mmsg2(&mdb->errmsg, _("Request for Volume item %d greater than max %d or less than 1\n"), item, numrows); db_unlock(mdb); return 0; } - /* Seek to desired item - * Note, we use base 1; SQL uses base 0 + /* Note, we previously seeked to the row using: + * sql_data_seek(mdb, item-1); + * but this failed on PostgreSQL, so now we loop + * over all the records. This should not be too horrible since + * the maximum Volumes we look at in any case is 20. */ - sql_data_seek(mdb, item-1); - - if ((row = sql_fetch_row(mdb)) == NULL) { - Mmsg1(&mdb->errmsg, _("No Volume record found for item %d.\n"), item); - sql_free_result(mdb); - db_unlock(mdb); - return 0; + while (item-- > 0) { + if ((row = sql_fetch_row(mdb)) == NULL) { + Mmsg1(&mdb->errmsg, _("No Volume record found for item %d.\n"), item); + sql_free_result(mdb); + db_unlock(mdb); + return 0; + } } /* Return fields in Media Record */ diff --git a/bacula/src/console/conio.c b/bacula/src/console/conio.c index 3a9dfe14aa..9fc5c4955b 100755 --- a/bacula/src/console/conio.c +++ b/bacula/src/console/conio.c @@ -9,7 +9,7 @@ */ /* - Copyright (C) 1981-2005 Kern Sibbald + Copyright (C) 1981-2006 Kern Sibbald Yes, that is 1981 no error. This program is free software; you can redistribute it and/or @@ -961,12 +961,8 @@ static void rawmode(FILE *input) /* Defaults, the main program can override these */ signal(SIGQUIT, SIG_IGN); signal(SIGHUP, SIG_IGN); -// signal(SIGSTOP, SIG_IGN); signal(SIGINT, sigintcatcher); signal(SIGWINCH, SIG_IGN); - signal(SIGQUIT, SIG_IGN); -// signal(SIGCHLD, SIG_IGN); -// signal(SIGTSTP, SIG_IGN); if (!termtype) { printf("Cannot get terminal type.\n"); diff --git a/bacula/src/dird/backup.c b/bacula/src/dird/backup.c index 79e041602e..292857a207 100644 --- a/bacula/src/dird/backup.c +++ b/bacula/src/dird/backup.c @@ -170,27 +170,27 @@ bool do_backup(JCR *jcr) Dmsg0(150, "Storage daemon connection OK\n"); if (!bnet_fsend(jcr->store_bsock, "run")) { - return false; + goto bail_out; } set_jcr_job_status(jcr, JS_WaitFD); if (!connect_to_file_daemon(jcr, 10, FDConnectTimeout, 1)) { - return false; + goto bail_out; } set_jcr_job_status(jcr, JS_Running); fd = jcr->file_bsock; if (!send_include_list(jcr)) { - return false; + goto bail_out; } if (!send_exclude_list(jcr)) { - return false; + goto bail_out; } if (!send_level_command(jcr)) { - return false; + goto bail_out; } /* @@ -212,18 +212,18 @@ bool do_backup(JCR *jcr) bnet_fsend(fd, storaddr, store->address, store->SDDport, tls_need); if (!response(jcr, fd, OKstore, "Storage", DISPLAY_ERROR)) { - return false; + goto bail_out; } if (!send_run_before_and_after_commands(jcr)) { - return false; + goto bail_out; } /* Send backup command */ bnet_fsend(fd, backupcmd); if (!response(jcr, fd, OKbackup, "backup", DISPLAY_ERROR)) { - return false; + goto bail_out; } /* Pickup Job termination data */ @@ -233,6 +233,14 @@ bool do_backup(JCR *jcr) return true; } return false; + +/* Come here only after starting SD thread */ +bail_out: + set_jcr_job_status(jcr, JS_ErrorTerminated); + Dmsg1(400, "wait for sd. use=%d\n", jcr->use_count()); + wait_for_storage_daemon_termination(jcr); + Dmsg1(400, "after wait for sd. use=%d\n", jcr->use_count()); + return false; } diff --git a/bacula/src/dird/bsr.c b/bacula/src/dird/bsr.c index 2321580aa2..2b2e77526e 100644 --- a/bacula/src/dird/bsr.c +++ b/bacula/src/dird/bsr.c @@ -191,6 +191,9 @@ bool complete_bsr(UAContext *ua, RBSR *bsr) return true; } +static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; +static uint32_t uniq = 0; + void make_unique_restore_filename(UAContext *ua, POOLMEM **fname) { JCR *jcr = ua->jcr; @@ -199,10 +202,16 @@ void make_unique_restore_filename(UAContext *ua, POOLMEM **fname) Mmsg(fname, "%s", ua->argv[i]); jcr->unlink_bsr = false; } else { - Mmsg(fname, "%s/%s.restore.%s.bsr", working_directory, my_name, - jcr->Job); + P(mutex); + uniq++; + V(mutex); + Mmsg(fname, "%s/%s.restore.%u.bsr", working_directory, my_name, uniq); jcr->unlink_bsr = true; } + if (jcr->RestoreBootstrap) { + free(jcr->RestoreBootstrap); + } + jcr->RestoreBootstrap = bstrdup(*fname); } /* diff --git a/bacula/src/dird/catreq.c b/bacula/src/dird/catreq.c index 95deae2086..8477d8bbc8 100644 --- a/bacula/src/dird/catreq.c +++ b/bacula/src/dird/catreq.c @@ -122,10 +122,9 @@ void catalog_request(JCR *jcr, BSOCK *bs) ok = db_get_pool_record(jcr, jcr->db, &pr); if (ok) { mr.PoolId = pr.PoolId; - if (jcr->store->StorageId) { - mr.StorageId = jcr->store->StorageId; - } + mr.StorageId = jcr->store->StorageId; ok = find_next_volume_for_append(jcr, &mr, index, true /*permit create new vol*/); + Dmsg3(100, "find_media idx=%d ok=%d vol=%s\n", index, ok, mr.VolumeName); } /* * Send Find Media response to Storage daemon diff --git a/bacula/src/dird/fd_cmds.c b/bacula/src/dird/fd_cmds.c index dfc4cc11db..8eef4c3e64 100644 --- a/bacula/src/dird/fd_cmds.c +++ b/bacula/src/dird/fd_cmds.c @@ -243,7 +243,7 @@ bool send_level_command(JCR *jcr) /* * Send either an Included or an Excluded list to FD */ -static int send_fileset(JCR *jcr) +static bool send_fileset(JCR *jcr) { FILESET *fileset = jcr->fileset; BSOCK *fd = jcr->file_bsock; @@ -385,11 +385,11 @@ static int send_fileset(JCR *jcr) if (!response(jcr, fd, OKinc, "Include", DISPLAY_ERROR)) { goto bail_out; } - return 1; + return true; bail_out: set_jcr_job_status(jcr, JS_ErrorTerminated); - return 0; + return false; } diff --git a/bacula/src/dird/job.c b/bacula/src/dird/job.c index 607835c7c5..280159e84a 100644 --- a/bacula/src/dird/job.c +++ b/bacula/src/dird/job.c @@ -90,7 +90,7 @@ bool setup_job(JCR *jcr) { int errstat; - P(jcr->mutex); + jcr->lock(); sm_check(__FILE__, __LINE__, true); init_msg(jcr, jcr->messages); @@ -102,6 +102,9 @@ bool setup_job(JCR *jcr) } jcr->term_wait_inited = true; + create_unique_job_name(jcr, jcr->job->hdr.name); + set_jcr_job_status(jcr, JS_Created); + jcr->unlock(); /* * Open database @@ -124,8 +127,6 @@ bool setup_job(JCR *jcr) /* * Create Job record */ - create_unique_job_name(jcr, jcr->job->hdr.name); - set_jcr_job_status(jcr, JS_Created); init_jcr_job_record(jcr); if (!db_create_job_record(jcr, jcr->db, &jcr->jr)) { Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db)); @@ -148,7 +149,6 @@ bool setup_job(JCR *jcr) Dmsg0(200, "Add jrc to work queue\n"); - V(jcr->mutex); return true; bail_out: @@ -156,7 +156,6 @@ bail_out: free_memory(jcr->fname); jcr->fname = NULL; } - V(jcr->mutex); return false; } @@ -784,10 +783,6 @@ void dird_free_jcr_pointers(JCR *jcr) free_pool_memory(jcr->client_uname); jcr->client_uname = NULL; } - if (jcr->term_wait_inited) { - pthread_cond_destroy(&jcr->term_wait); - jcr->term_wait_inited = false; - } if (jcr->attr) { free_pool_memory(jcr->attr); jcr->attr = NULL; @@ -808,6 +803,10 @@ void dird_free_jcr(JCR *jcr) Dmsg0(200, "Start dird free_jcr\n"); dird_free_jcr_pointers(jcr); + if (jcr->term_wait_inited) { + pthread_cond_destroy(&jcr->term_wait); + jcr->term_wait_inited = false; + } /* Delete lists setup to hold storage pointers */ if (jcr->storage) { @@ -968,15 +967,8 @@ bool create_restore_bootstrap_file(JCR *jcr) free_bsr(rx.bsr); return false; } - if (jcr->RestoreBootstrap) { - free(jcr->RestoreBootstrap); - } - POOLMEM *fname = get_pool_memory(PM_MESSAGE); - make_unique_restore_filename(ua, &fname); - jcr->RestoreBootstrap = bstrdup(fname); free_ua_context(ua); free_bsr(rx.bsr); - free_pool_memory(fname); jcr->needs_sd = true; return true; } diff --git a/bacula/src/dird/jobq.c b/bacula/src/dird/jobq.c index 6ec9a5de51..4d79e77a3d 100755 --- a/bacula/src/dird/jobq.c +++ b/bacula/src/dird/jobq.c @@ -18,22 +18,17 @@ * */ /* - Copyright (C) 2003-2005 Kern Sibbald + Copyright (C) 2003-2006 Kern Sibbald This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of - the License, or (at your option) any later version. + modify it under the terms of the GNU General Public License + version 2 as amended with additional clauses defined in the + file LICENSE in the main source directory. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public - License along with this program; if not, write to the Free - Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, - MA 02111-1307, USA. + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + the file LICENSE for additional details. */ @@ -182,7 +177,8 @@ void *sched_wait(void *arg) } /* Check every 30 seconds if canceled */ while (wtime > 0) { - Dmsg2(2300, "Waiting on sched time, jobid=%d secs=%d\n", jcr->JobId, wtime); + Dmsg3(2300, "Waiting on sched time, jobid=%d secs=%d use=%d\n", + jcr->JobId, wtime, jcr->use_count()); if (wtime > 30) { wtime = 30; } @@ -192,9 +188,8 @@ void *sched_wait(void *arg) } wtime = jcr->sched_time - time(NULL); } - P(jcr->mutex); /* lock jcr */ + Dmsg1(200, "resched use=%d\n", jcr->use_count()); jobq_add(jq, jcr); - V(jcr->mutex); free_jcr(jcr); /* we are done with jcr */ Dmsg0(2300, "Exit sched_wait\n"); return NULL; @@ -203,9 +198,6 @@ void *sched_wait(void *arg) /* * Add a job to the queue * jq is a queue that was created with jobq_init - * - * On entry jcr->mutex must be locked. - * */ int jobq_add(jobq_t *jq, JCR *jcr) { @@ -216,14 +208,14 @@ int jobq_add(jobq_t *jq, JCR *jcr) pthread_t id; wait_pkt *sched_pkt; - Dmsg3(2300, "jobq_add jobid=%d jcr=0x%x use_count=%d\n", jcr->JobId, jcr, jcr->use_count); + Dmsg3(2300, "jobq_add jobid=%d jcr=0x%x use_count=%d\n", jcr->JobId, jcr, jcr->use_count()); if (jq->valid != JOBQ_VALID) { Jmsg0(jcr, M_ERROR, 0, "Jobq_add queue not initialized.\n"); return EINVAL; } - jcr->use_count++; /* mark jcr in use by us */ - Dmsg3(2300, "jobq_add jobid=%d jcr=0x%x use_count=%d\n", jcr->JobId, jcr, jcr->use_count); + jcr->inc_use_count(); /* mark jcr in use by us */ + Dmsg3(2300, "jobq_add jobid=%d jcr=0x%x use_count=%d\n", jcr->JobId, jcr, jcr->use_count()); if (!job_canceled(jcr) && wtime > 0) { set_thread_concurrency(jq->max_workers + 2); sched_pkt = (wait_pkt *)malloc(sizeof(wait_pkt)); @@ -240,12 +232,12 @@ int jobq_add(jobq_t *jq, JCR *jcr) if ((stat = pthread_mutex_lock(&jq->mutex)) != 0) { berrno be; Jmsg1(jcr, M_ERROR, 0, _("pthread_mutex_lock: ERR=%s\n"), be.strerror(stat)); - jcr->use_count--; /* release jcr */ + free_jcr(jcr); /* release jcr */ return stat; } if ((item = (jobq_item_t *)malloc(sizeof(jobq_item_t))) == NULL) { - jcr->use_count--; /* release jcr */ + free_jcr(jcr); /* release jcr */ return ENOMEM; } item->jcr = jcr; @@ -444,10 +436,12 @@ void *jobq_server(void *arg) V(jq->mutex); /* Call user's routine here */ - Dmsg1(2300, "Calling user engine for jobid=%d\n", jcr->JobId); + Dmsg2(2300, "Calling user engine for jobid=%d use=%d\n", jcr->JobId, + jcr->use_count()); jq->engine(je->jcr); - Dmsg1(2300, "Back from user engine jobid=%d.\n", jcr->JobId); + Dmsg2(2300, "Back from user engine jobid=%d use=%d.\n", jcr->JobId, + jcr->use_count()); /* Reacquire job queue lock */ P(jq->mutex); @@ -490,11 +484,12 @@ void *jobq_server(void *arg) jcr->JobStatus = JS_WaitStartTime; jcr->SDJobStatus = 0; if (jcr->JobBytes == 0) { - Dmsg1(2300, "Requeue job=%d\n", jcr->JobId); + Dmsg2(2300, "Requeue job=%d use=%d\n", jcr->JobId, jcr->use_count()); jcr->JobStatus = JS_WaitStartTime; V(jq->mutex); jobq_add(jq, jcr); /* queue the job to run again */ P(jq->mutex); + free_jcr(jcr); /* release jcr */ free(je); /* free the job entry */ continue; /* look for another job to run */ } @@ -523,7 +518,7 @@ void *jobq_server(void *arg) db_close_database(jcr, jcr->db); jcr->db = NULL; } - Dmsg2(2300, "====== Termination job=%d use_cnt=%d\n", jcr->JobId, jcr->use_count); + Dmsg2(2300, "====== Termination job=%d use_cnt=%d\n", jcr->JobId, jcr->use_count()); jcr->SDJobStatus = 0; V(jq->mutex); /* release internal lock */ free_jcr(jcr); diff --git a/bacula/src/dird/migrate.c b/bacula/src/dird/migrate.c index b818adc37b..4f94b3bb64 100644 --- a/bacula/src/dird/migrate.c +++ b/bacula/src/dird/migrate.c @@ -305,13 +305,13 @@ static int jobid_handler(void *ctx, int num_fields, char **row) const char *sql_smallest_vol = "SELECT MediaId FROM Media,Pool WHERE" - " VolStatus in ('Full','Used') AND" + " VolStatus in ('Full','Used','Error') AND" " Media.PoolId=Pool.PoolId AND Pool.Name='%s'" " ORDER BY VolBytes ASC LIMIT 1"; const char *sql_oldest_vol = "SELECT MediaId FROM Media,Pool WHERE" - " VolStatus in ('Full','Used') AND" + " VolStatus in ('Full','Used','Error') AND" " Media.PoolId=Pool.PoolId AND Pool.Name='%s'" " ORDER BY LastWritten ASC LIMIT 1"; @@ -320,6 +320,40 @@ const char *sql_jobids_from_mediaid = " WHERE JobMedia.JobId=Job.JobId AND JobMedia.MediaId=%s" " ORDER by Job.StartTime"; +const char *sql_pool_bytes = + "SELECT SUM(VolBytes) FROM Media,Pool WHERE" + " VolStatus in ('Full','Used','Error','Append') AND" + " Media.PoolId=Pool.PoolId AND Pool.Name='%s'"; + +const char *sql_vol_bytes = + "SELECT MediaId FROM Media,Pool WHERE" + " VolStatus in ('Full','Used','Error') AND" + " Media.PoolId=Pool.PoolId AND Pool.Name='%s' AND" + " VolBytes<%s ORDER BY LastWritten ASC LIMIT 1"; + +const char *sql_client = + "SELECT DISTINCT Client.Name from Client,Pool,Media,Job,JobMedia " + " WHERE Media.PoolId=Pool.PoolId AND Pool.Name='%s' AND" + " JobMedia.JobId=Job.JobId AND Job.ClientId=Client.ClientId AND" + " Job.PoolId=Media.PoolId"; + +const char *sql_job = + "SELECT DISTINCT Job.Name from Client,Pool,Media,Job,JobMedia " + " WHERE Media.PoolId=Pool.PoolId AND Pool.Name='%s' AND" + " JobMedia.JobId=Job.JobId AND Job.ClientId=Client.ClientId AND" + " Job.PoolId=Media.PoolId"; + +const char *sql_ujobid = + "SELECT DISTINCT Job.Job from Client,Pool,Media,Job,JobMedia " + " WHERE Media.PoolId=Pool.PoolId AND Pool.Name='%s' AND" + " JobMedia.JobId=Job.JobId AND Job.ClientId=Client.ClientId AND" + " Job.PoolId=Media.PoolId"; + +const char *sql_vol = + "SELECT DISTINCT VolumeName FROM Media,Pool WHERE" + " VolStatus in ('Full','Used','Error') AND" + " Media.PoolId=Pool.PoolId AND Pool.Name='%s'"; + /* @@ -331,6 +365,9 @@ static bool get_job_to_migrate(JCR *jcr) char ed1[30]; POOL_MEM query(PM_MESSAGE); POOLMEM *JobIds = get_pool_memory(PM_MESSAGE); + JobId_t JobId; + int stat; + char *p; if (jcr->MigrateJobId != 0) { jcr->previous_jr.JobId = jcr->MigrateJobId; @@ -357,7 +394,6 @@ static bool get_job_to_migrate(JCR *jcr) goto bail_out; } Dmsg1(000, "Jobids=%s\n", JobIds); - goto ok_out; break; case MT_OLDEST_VOL: Mmsg(query, sql_oldest_vol, jcr->pool->hdr.name); @@ -380,9 +416,20 @@ static bool get_job_to_migrate(JCR *jcr) goto bail_out; } Dmsg1(000, "Jobids=%s\n", JobIds); - goto ok_out; break; case MT_POOL_OCCUPANCY: + Mmsg(query, sql_pool_bytes, jcr->pool->hdr.name); + JobIds = get_pool_memory(PM_MESSAGE); + JobIds[0] = 0; + if (!db_sql_query(jcr->db, query.c_str(), jobid_handler, (void *)JobIds)) { + Jmsg(jcr, M_FATAL, 0, + _("SQL to get Volume failed. ERR=%s\n"), db_strerror(jcr->db)); + goto bail_out; + } + if (JobIds[0] == 0) { + Jmsg(jcr, M_INFO, 0, _("No jobs found to migrate.\n")); + goto ok_out; + } break; case MT_POOL_TIME: break; @@ -415,7 +462,20 @@ static bool get_job_to_migrate(JCR *jcr) goto bail_out; } } - Dmsg1(100, "Last jobid=%d\n", jcr->previous_jr.JobId); + + p = JobIds; + stat = get_next_jobid_from_list(&p, &JobId); + Dmsg2(000, "get_next_jobid stat=%d JobId=%u\n", stat, JobId); + if (stat < 0) { + Jmsg(jcr, M_FATAL, 0, _("Invalid JobId found.\n")); + goto bail_out; + } else if (stat == 0) { + Jmsg(jcr, M_INFO, 0, _("No JobIds found to migrate.\n")); + goto ok_out; + } + + jcr->previous_jr.JobId = JobId; + Dmsg1(000, "Last jobid=%d\n", jcr->previous_jr.JobId); if (!db_get_job_record(jcr, jcr->db, &jcr->previous_jr)) { Jmsg(jcr, M_FATAL, 0, _("Could not get job record for JobId %s to migrate. ERR=%s"), diff --git a/bacula/src/dird/msgchan.c b/bacula/src/dird/msgchan.c index faaaceb3e7..42dd9446e5 100644 --- a/bacula/src/dird/msgchan.c +++ b/bacula/src/dird/msgchan.c @@ -33,6 +33,8 @@ #include "bacula.h" #include "dird.h" +static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; + /* Commands sent to Storage daemon */ static char jobcmd[] = "JobId=%d job=%s job_name=%s client_name=%s " "type=%d level=%d FileSet=%s NoAttr=%d SpoolAttr=%d FileSetMD5=%s " @@ -260,34 +262,30 @@ int start_storage_daemon_message_thread(JCR *jcr) int status; pthread_t thid; - P(jcr->mutex); - jcr->use_count++; /* mark in use by msg thread */ + jcr->inc_use_count(); /* mark in use by msg thread */ jcr->sd_msg_thread_done = false; jcr->SD_msg_chan = 0; - V(jcr->mutex); Dmsg0(100, "Start SD msg_thread.\n"); if ((status=pthread_create(&thid, NULL, msg_thread, (void *)jcr)) != 0) { berrno be; Jmsg1(jcr, M_ABORT, 0, _("Cannot create message thread: %s\n"), be.strerror(status)); } - Dmsg0(100, "SD msg_thread started.\n"); /* Wait for thread to start */ while (jcr->SD_msg_chan == 0) { bmicrosleep(0, 50); } + Dmsg1(100, "SD msg_thread started. use=%d\n", jcr->use_count()); return 1; } extern "C" void msg_thread_cleanup(void *arg) { JCR *jcr = (JCR *)arg; - Dmsg0(200, "End msg_thread\n"); db_end_transaction(jcr, jcr->db); /* terminate any open transaction */ - P(jcr->mutex); jcr->sd_msg_thread_done = true; - pthread_cond_broadcast(&jcr->term_wait); /* wakeup any waiting threads */ jcr->SD_msg_chan = 0; - V(jcr->mutex); + pthread_cond_broadcast(&jcr->term_wait); /* wakeup any waiting threads */ + Dmsg1(100, "=== End msg_thread. use=%d\n", jcr->use_count()); free_jcr(jcr); /* release jcr */ } @@ -314,9 +312,8 @@ extern "C" void *msg_thread(void *arg) /* Read the Storage daemon's output. */ Dmsg0(100, "Start msg_thread loop\n"); - while ((stat=bget_dirmsg(sd)) >= 0) { - int stat; - Dmsg1(3400, "msg); + while (!job_canceled(jcr) && bget_dirmsg(sd) >= 0) { + Dmsg1(400, "msg); if (sscanf(sd->msg, Job_start, Job) == 1) { continue; } @@ -327,6 +324,7 @@ extern "C" void *msg_thread(void *arg) jcr->SDJobBytes = JobBytes; break; } + Dmsg2(400, "end loop stat=%d use=%d\n", stat, jcr->use_count()); } if (is_bnet_error(sd)) { jcr->SDJobStatus = JS_ErrorTerminated; @@ -339,8 +337,6 @@ void wait_for_storage_daemon_termination(JCR *jcr) { int cancel_count = 0; /* Now wait for Storage daemon to terminate our message thread */ - set_jcr_job_status(jcr, JS_WaitSD); - P(jcr->mutex); while (!jcr->sd_msg_thread_done) { struct timeval tv; struct timezone tz; @@ -348,18 +344,25 @@ void wait_for_storage_daemon_termination(JCR *jcr) gettimeofday(&tv, &tz); timeout.tv_nsec = 0; - timeout.tv_sec = tv.tv_sec + 10; /* wait 10 seconds */ - Dmsg0(300, "I'm waiting for message thread termination.\n"); - pthread_cond_timedwait(&jcr->term_wait, &jcr->mutex, &timeout); + timeout.tv_sec = tv.tv_sec + 5; /* wait 5 seconds */ + Dmsg0(400, "I'm waiting for message thread termination.\n"); + P(mutex); + pthread_cond_timedwait(&jcr->term_wait, &mutex, &timeout); + V(mutex); if (job_canceled(jcr)) { + if (jcr->SD_msg_chan) { + jcr->store_bsock->timed_out = 1; + jcr->store_bsock->terminated = 1; + Dmsg2(400, "kill jobid=%d use=%d\n", (int)jcr->JobId, jcr->use_count()); + pthread_kill(jcr->SD_msg_chan, TIMEOUT_SIGNAL); + } cancel_count++; } /* Give SD 30 seconds to clean up after cancel */ - if (cancel_count == 3) { + if (cancel_count == 6) { break; } } - V(jcr->mutex); set_jcr_job_status(jcr, JS_Terminated); } @@ -377,7 +380,7 @@ extern "C" void *device_thread(void *arg) jcr = new_control_jcr("*DeviceInit*", JT_SYSTEM); for (i=0; i < MAX_TRIES; i++) { if (!connect_to_storage_daemon(jcr, 10, 30, 1)) { - Dmsg0(000, "Failed connecting to SD.\n"); + Dmsg0(900, "Failed connecting to SD.\n"); continue; } LockRes(); diff --git a/bacula/src/dird/next_vol.c b/bacula/src/dird/next_vol.c index f27e0cb25f..bc98a90b26 100644 --- a/bacula/src/dird/next_vol.c +++ b/bacula/src/dird/next_vol.c @@ -301,7 +301,8 @@ void check_if_volume_valid_or_recyclable(JCR *jcr, MEDIA_DBR *mr, const char **r } } else { *reason = _("but should be Append, Purged or Recycle (cannot automatically " - "recycle current volume, as it still contains unpruned data)"); + "recycle current volume, as it still contains unpruned data " + "or the Volume Retention time has not expired.)"); } } } diff --git a/bacula/src/dird/ua_output.c b/bacula/src/dird/ua_output.c index 36ae41ec82..dce6f8b876 100644 --- a/bacula/src/dird/ua_output.c +++ b/bacula/src/dird/ua_output.c @@ -465,15 +465,18 @@ static bool list_nextvol(UAContext *ua, int ndays) if (!complete_jcr_for_job(jcr, job, pool)) { return false; } - mr.PoolId = jcr->jr.PoolId; - if (run->storage) { - jcr->store = run->storage; - } memset(&pr, 0, sizeof(pr)); pr.PoolId = jcr->jr.PoolId; if (! db_get_pool_record(ua->jcr, ua->db, &pr)) { strcpy(pr.Name, "*UnknownPool*"); } + mr.PoolId = jcr->jr.PoolId; + if (run->storage) { + jcr->store = run->storage; + } else { + jcr->store = (STORE *)job->storage->first(); + } + mr.StorageId = jcr->store->StorageId; if (!find_next_volume_for_append(jcr, &mr, 1, false/*no create*/)) { bsendmsg(ua, _("Could not find next Volume for Job %s (%s, %s).\n"), job->hdr.name, pr.Name, level_to_str(run->level)); diff --git a/bacula/src/dird/ua_prune.c b/bacula/src/dird/ua_prune.c index 137462ddb0..7fdd575407 100644 --- a/bacula/src/dird/ua_prune.c +++ b/bacula/src/dird/ua_prune.c @@ -270,7 +270,7 @@ int prune_files(UAContext *ua, CLIENT *client) for (i=0; i < del.num_ids; i++) { Mmsg(query, del_File, edit_int64(del.JobId[i], ed1)); - Dmsg1(000, "Delete Files JobId=%s\n", ed1); + Dmsg1(200, "Delete Files JobId=%s\n", ed1); db_sql_query(ua->db, query, NULL, (void *)NULL); /* * Now mark Job as having files purged. This is necessary to @@ -280,7 +280,7 @@ int prune_files(UAContext *ua, CLIENT *client) */ Mmsg(query, upd_Purged, edit_int64(del.JobId[i], ed1)); db_sql_query(ua->db, query, NULL, (void *)NULL); - Dmsg1(000, "Update Purged sql=%s\n", query); + Dmsg1(200, "Update Purged sql=%s\n", query); } edit_uint64_with_commas(del.num_ids, ed1); bsendmsg(ua, _("Pruned Files from %s Jobs for client %s from catalog.\n"), diff --git a/bacula/src/dird/ua_restore.c b/bacula/src/dird/ua_restore.c index 2d29849083..1ba9dab5e6 100644 --- a/bacula/src/dird/ua_restore.c +++ b/bacula/src/dird/ua_restore.c @@ -81,7 +81,7 @@ int restore_cmd(UAContext *ua, const char *cmd) RESTORE_CTX rx; /* restore context */ JOB *job; int i; - POOLMEM *fname; + JCR *jcr = ua->jcr; memset(&rx, 0, sizeof(rx)); rx.path = get_pool_memory(PM_FNAME); @@ -177,22 +177,19 @@ int restore_cmd(UAContext *ua, const char *cmd) } /* Build run command */ - fname = get_pool_memory(PM_MESSAGE); - make_unique_restore_filename(ua, &fname); if (rx.where) { Mmsg(ua->cmd, "run job=\"%s\" client=\"%s\" storage=\"%s\" bootstrap=\"%s\"" " where=\"%s\" files=%d catalog=\"%s\"", job->hdr.name, rx.ClientName, rx.store?rx.store->hdr.name:"", - fname, rx.where, rx.selected_files, ua->catalog->hdr.name); + jcr->RestoreBootstrap, rx.where, rx.selected_files, ua->catalog->hdr.name); } else { Mmsg(ua->cmd, "run job=\"%s\" client=\"%s\" storage=\"%s\" bootstrap=\"%s\"" " files=%d catalog=\"%s\"", job->hdr.name, rx.ClientName, rx.store?rx.store->hdr.name:"", - fname, rx.selected_files, ua->catalog->hdr.name); + jcr->RestoreBootstrap, rx.selected_files, ua->catalog->hdr.name); } - free_pool_memory(fname); if (find_arg(ua, N_("yes")) > 0) { pm_strcat(ua->cmd, " yes"); /* pass it on to the run command */ } diff --git a/bacula/src/dird/ua_status.c b/bacula/src/dird/ua_status.c index 84138f31b0..aaf6a81b32 100644 --- a/bacula/src/dird/ua_status.c +++ b/bacula/src/dird/ua_status.c @@ -360,6 +360,7 @@ struct sched_pkt { int priority; time_t runtime; POOL *pool; + STORE *store; }; static void prt_runtime(UAContext *ua, sched_pkt *sp) @@ -380,6 +381,7 @@ static void prt_runtime(UAContext *ua, sched_pkt *sp) } if (ok) { mr.PoolId = jcr->jr.PoolId; + mr.StorageId = sp->store->StorageId; ok = find_next_volume_for_append(jcr, &mr, 1, false/*no create*/); } if (!ok) { @@ -435,6 +437,7 @@ static void list_scheduled_jobs(UAContext *ua) time_t runtime; RUN *run; JOB *job; + STORE* store; int level, num_jobs = 0; int priority; bool hdr_printed = false; @@ -469,6 +472,11 @@ static void list_scheduled_jobs(UAContext *ua) if (run->Priority) { priority = run->Priority; } + if (run->storage) { + store = run->storage; + } else { + store = (STORE *)job->storage->first(); + } if (!hdr_printed) { prt_runhdr(ua); hdr_printed = true; @@ -479,6 +487,7 @@ static void list_scheduled_jobs(UAContext *ua) sp->priority = priority; sp->runtime = runtime; sp->pool = run->pool; + sp->store = store; sched.binary_insert_multiple(sp, my_compare); num_jobs++; } diff --git a/bacula/src/filed/backup.c b/bacula/src/filed/backup.c index 3af54fcb8f..32c1e8535b 100644 --- a/bacula/src/filed/backup.c +++ b/bacula/src/filed/backup.c @@ -875,11 +875,11 @@ static bool encode_and_send_attributes(JCR *jcr, FF_PKT *ff_pkt, int &data_strea Dmsg3(300, "File %s\nattribs=%s\nattribsEx=%s\n", ff_pkt->fname, attribs, attribsEx); - P(jcr->mutex); + jcr->lock(); jcr->JobFiles++; /* increment number of files sent */ ff_pkt->FileIndex = jcr->JobFiles; /* return FileIndex */ pm_strcpy(jcr->last_fname, ff_pkt->fname); - V(jcr->mutex); + jcr->unlock(); /* * Send Attributes header to Storage daemon diff --git a/bacula/src/filed/job.c b/bacula/src/filed/job.c index fffe62bf20..579d645f1a 100644 --- a/bacula/src/filed/job.c +++ b/bacula/src/filed/job.c @@ -328,16 +328,11 @@ static int cancel_cmd(JCR *jcr) bnet_fsend(dir, _("2901 Job %s not found.\n"), Job); } else { if (cjcr->store_bsock) { - P(cjcr->mutex); cjcr->store_bsock->timed_out = 1; cjcr->store_bsock->terminated = 1; -/* - * #if !defined(HAVE_CYGWIN) && !defined(HAVE_WIN32) - */ #if !defined(HAVE_CYGWIN) pthread_kill(cjcr->my_thread_id, TIMEOUT_SIGNAL); #endif - V(cjcr->mutex); } set_jcr_job_status(cjcr, JS_Canceled); free_jcr(cjcr); @@ -397,7 +392,7 @@ static int job_cmd(JCR *jcr) { BSOCK *dir = jcr->dir_bsock; POOLMEM *sd_auth_key; - + sd_auth_key = get_memory(dir->msglen); if (sscanf(dir->msg, jobcmd, &jcr->JobId, jcr->Job, &jcr->VolSessionId, &jcr->VolSessionTime, diff --git a/bacula/src/filed/restore.c b/bacula/src/filed/restore.c index 4aed75c9b6..274b07148f 100644 --- a/bacula/src/filed/restore.c +++ b/bacula/src/filed/restore.c @@ -289,10 +289,10 @@ void do_restore(JCR *jcr) extract = true; /* FALLTHROUGH */ case CF_CREATED: /* File created, but there is no content */ - P(jcr->mutex); + jcr->lock(); pm_strcpy(jcr->last_fname, attr->ofname); - V(jcr->mutex); jcr->JobFiles++; + jcr->unlock(); fileAddr = 0; print_ls_output(jcr, attr); #ifdef HAVE_DARWIN_OS diff --git a/bacula/src/filed/status.c b/bacula/src/filed/status.c index ede4e6d987..4962da52e5 100755 --- a/bacula/src/filed/status.c +++ b/bacula/src/filed/status.c @@ -172,9 +172,9 @@ static void do_status(void sendit(const char *msg, int len, void *sarg), void *a edit_uint64_with_commas(njcr->num_files_examined, b1)); sendit(msg, len, arg); if (njcr->JobFiles > 0) { - P(njcr->mutex); + njcr->lock(); len = Mmsg(msg, _(" Processing file: %s\n"), njcr->last_fname); - V(njcr->mutex); + njcr->unlock(); sendit(msg, len, arg); } diff --git a/bacula/src/filed/verify.c b/bacula/src/filed/verify.c index 8312b7006f..c882a8a649 100644 --- a/bacula/src/filed/verify.c +++ b/bacula/src/filed/verify.c @@ -1,5 +1,5 @@ /* - * Bacula File Daemon verify.c Verify files. + * Bacula File Daemon verify.c Verify files. * * Kern Sibbald, October MM * @@ -43,7 +43,7 @@ void do_verify(JCR *jcr) jcr->buf_size = DEFAULT_NETWORK_BUFFER_SIZE; if ((jcr->big_buf = (char *) malloc(jcr->buf_size)) == NULL) { Jmsg1(jcr, M_ABORT, 0, _("Cannot malloc %d network read buffer\n"), - DEFAULT_NETWORK_BUFFER_SIZE); + DEFAULT_NETWORK_BUFFER_SIZE); } set_find_options((FF_PKT *)jcr->ff, jcr->incremental, jcr->mtime); Dmsg0(10, "Start find files\n"); @@ -78,10 +78,10 @@ static int verify_file(FF_PKT *ff_pkt, void *pkt, bool top_level) } dir = jcr->dir_bsock; - jcr->num_files_examined++; /* bump total file count */ + jcr->num_files_examined++; /* bump total file count */ switch (ff_pkt->type) { - case FT_LNKSAVED: /* Hard linked, file already saved */ + case FT_LNKSAVED: /* Hard linked, file already saved */ Dmsg2(30, "FT_LNKSAVED saving: %s => %s\n", ff_pkt->fname, ff_pkt->link); break; case FT_REGE: @@ -94,7 +94,7 @@ static int verify_file(FF_PKT *ff_pkt, void *pkt, bool top_level) Dmsg2(30, "FT_LNK saving: %s -> %s\n", ff_pkt->fname, ff_pkt->link); break; case FT_DIRBEGIN: - return 1; /* ignored */ + return 1; /* ignored */ case FT_DIREND: Dmsg1(30, "FT_DIR saving: %s\n", ff_pkt->fname); break; @@ -158,19 +158,19 @@ static int verify_file(FF_PKT *ff_pkt, void *pkt, bool top_level) encode_stat(attribs, ff_pkt, 0); encode_attribsEx(jcr, attribsEx, ff_pkt); - P(jcr->mutex); - jcr->JobFiles++; /* increment number of files sent */ + jcr->lock(); + jcr->JobFiles++; /* increment number of files sent */ pm_strcpy(jcr->last_fname, ff_pkt->fname); - V(jcr->mutex); + jcr->unlock(); /* * Send file attributes to Director - * File_index - * Stream - * Verify Options - * Filename (full path) - * Encoded attributes - * Link name (if type==FT_LNK) + * File_index + * Stream + * Verify Options + * Filename (full path) + * Encoded attributes + * Link name (if type==FT_LNK) * For a directory, link is the same as fname, but with trailing * slash. For a linked file, link is the link. */ @@ -178,17 +178,17 @@ static int verify_file(FF_PKT *ff_pkt, void *pkt, bool top_level) Dmsg2(400, "send ATTR inx=%d fname=%s\n", jcr->JobFiles, ff_pkt->fname); if (ff_pkt->type == FT_LNK || ff_pkt->type == FT_LNKSAVED) { stat = bnet_fsend(dir, "%d %d %s %s%c%s%c%s%c", jcr->JobFiles, - STREAM_UNIX_ATTRIBUTES, ff_pkt->VerifyOpts, ff_pkt->fname, - 0, attribs, 0, ff_pkt->link, 0); + STREAM_UNIX_ATTRIBUTES, ff_pkt->VerifyOpts, ff_pkt->fname, + 0, attribs, 0, ff_pkt->link, 0); } else if (ff_pkt->type == FT_DIREND) { - /* Here link is the canonical filename (i.e. with trailing slash) */ + /* Here link is the canonical filename (i.e. with trailing slash) */ stat = bnet_fsend(dir,"%d %d %s %s%c%s%c%c", jcr->JobFiles, - STREAM_UNIX_ATTRIBUTES, ff_pkt->VerifyOpts, ff_pkt->link, - 0, attribs, 0, 0); + STREAM_UNIX_ATTRIBUTES, ff_pkt->VerifyOpts, ff_pkt->link, + 0, attribs, 0, 0); } else { stat = bnet_fsend(dir,"%d %d %s %s%c%s%c%c", jcr->JobFiles, - STREAM_UNIX_ATTRIBUTES, ff_pkt->VerifyOpts, ff_pkt->fname, - 0, attribs, 0, 0); + STREAM_UNIX_ATTRIBUTES, ff_pkt->VerifyOpts, ff_pkt->fname, + 0, attribs, 0, 0); } Dmsg2(20, "bfiled>bdird: attribs len=%d: msg=%s\n", dir->msglen, dir->msg); if (!stat) { @@ -201,64 +201,64 @@ static int verify_file(FF_PKT *ff_pkt, void *pkt, bool top_level) * First we initialise, then we read files, other streams and Finder Info. */ if (ff_pkt->type != FT_LNKSAVED && (S_ISREG(ff_pkt->statp.st_mode) && - ff_pkt->flags & (FO_MD5|FO_SHA1|FO_SHA256|FO_SHA512))) { + ff_pkt->flags & (FO_MD5|FO_SHA1|FO_SHA256|FO_SHA512))) { /* * Create our digest context. If this fails, the digest will be set to NULL * and not used. */ if (ff_pkt->flags & FO_MD5) { - digest = crypto_digest_new(CRYPTO_DIGEST_MD5); - digest_stream = STREAM_MD5_DIGEST; + digest = crypto_digest_new(CRYPTO_DIGEST_MD5); + digest_stream = STREAM_MD5_DIGEST; } else if (ff_pkt->flags & FO_SHA1) { - digest = crypto_digest_new(CRYPTO_DIGEST_SHA1); - digest_stream = STREAM_SHA1_DIGEST; + digest = crypto_digest_new(CRYPTO_DIGEST_SHA1); + digest_stream = STREAM_SHA1_DIGEST; } else if (ff_pkt->flags & FO_SHA256) { - digest = crypto_digest_new(CRYPTO_DIGEST_SHA256); - digest_stream = STREAM_SHA256_DIGEST; + digest = crypto_digest_new(CRYPTO_DIGEST_SHA256); + digest_stream = STREAM_SHA256_DIGEST; } else if (ff_pkt->flags & FO_SHA512) { - digest = crypto_digest_new(CRYPTO_DIGEST_SHA512); - digest_stream = STREAM_SHA512_DIGEST; + digest = crypto_digest_new(CRYPTO_DIGEST_SHA512); + digest_stream = STREAM_SHA512_DIGEST; } /* Did digest initialization fail? */ if (digest_stream != STREAM_NONE && digest == NULL) { Jmsg(jcr, M_WARNING, 0, _("%s digest initialization failed\n"), - stream_to_ascii(digest_stream)); + stream_to_ascii(digest_stream)); } /* compute MD5 or SHA1 hash */ if (digest) { - char md[CRYPTO_DIGEST_MAX_SIZE]; - size_t size; - - size = sizeof(md); - - if (digest_file(jcr, ff_pkt, digest) != 0) { - jcr->Errors++; - return 1; - } - - if (crypto_digest_finalize(digest, &md, &size)) { - char *digest_buf; - const char *digest_name; - - digest_buf = (char *) malloc(BASE64_SIZE(size)); - digest_name = crypto_digest_name(digest); - - bin_to_base64(digest_buf, (char *) md, size); + char md[CRYPTO_DIGEST_MAX_SIZE]; + size_t size; + + size = sizeof(md); + + if (digest_file(jcr, ff_pkt, digest) != 0) { + jcr->Errors++; + return 1; + } + + if (crypto_digest_finalize(digest, &md, &size)) { + char *digest_buf; + const char *digest_name; + + digest_buf = (char *) malloc(BASE64_SIZE(size)); + digest_name = crypto_digest_name(digest); + + bin_to_base64(digest_buf, (char *) md, size); Dmsg3(400, "send inx=%d %s=%s\n", jcr->JobFiles, digest_name, digest_buf); bnet_fsend(dir, "%d %d %s *%s-%d*", jcr->JobFiles, digest_stream, digest_buf, - digest_name, jcr->JobFiles); + digest_name, jcr->JobFiles); Dmsg3(20, "bfiled>bdird: %s len=%d: msg=%s\n", digest_name, - dir->msglen, dir->msg); + dir->msglen, dir->msg); - free(digest_buf); - } + free(digest_buf); + } - crypto_digest_free(digest); + crypto_digest_free(digest); } } @@ -276,14 +276,14 @@ int digest_file(JCR *jcr, FF_PKT *ff_pkt, DIGEST *digest) binit(&bfd); if (ff_pkt->statp.st_size > 0 || ff_pkt->type == FT_RAW - || ff_pkt->type == FT_FIFO) { + || ff_pkt->type == FT_FIFO) { if ((bopen(&bfd, ff_pkt->fname, O_RDONLY | O_BINARY, 0)) < 0) { - ff_pkt->ff_errno = errno; - berrno be; - be.set_errno(bfd.berrno); + ff_pkt->ff_errno = errno; + berrno be; + be.set_errno(bfd.berrno); Jmsg(jcr, M_NOTSAVED, 1, _(" Cannot open %s: ERR=%s.\n"), - ff_pkt->fname, be.strerror()); - return 1; + ff_pkt->fname, be.strerror()); + return 1; } read_digest(&bfd, digest, jcr); bclose(&bfd); @@ -293,14 +293,14 @@ int digest_file(JCR *jcr, FF_PKT *ff_pkt, DIGEST *digest) /* Open resource fork if necessary */ if (ff_pkt->flags & FO_HFSPLUS && ff_pkt->hfsinfo.rsrclength > 0) { if (bopen_rsrc(&bfd, ff_pkt->fname, O_RDONLY | O_BINARY, 0) < 0) { - ff_pkt->ff_errno = errno; - berrno be; + ff_pkt->ff_errno = errno; + berrno be; Jmsg(jcr, M_NOTSAVED, -1, _(" Cannot open resource fork for %s: ERR=%s.\n"), - ff_pkt->fname, be.strerror()); - if (is_bopen(&ff_pkt->bfd)) { - bclose(&ff_pkt->bfd); - } - return 1; + ff_pkt->fname, be.strerror()); + if (is_bopen(&ff_pkt->bfd)) { + bclose(&ff_pkt->bfd); + } + return 1; } read_digest(&bfd, digest, jcr); bclose(&bfd); @@ -332,7 +332,7 @@ int read_digest(BFILE *bfd, DIGEST *digest, JCR *jcr) berrno be; be.set_errno(bfd->berrno); Jmsg(jcr, M_ERROR, 1, _("Error reading file %s: ERR=%s\n"), - jcr->last_fname, be.strerror()); + jcr->last_fname, be.strerror()); jcr->Errors++; return -1; } diff --git a/bacula/src/filed/verify_vol.c b/bacula/src/filed/verify_vol.c index abcf922500..b468584903 100644 --- a/bacula/src/filed/verify_vol.c +++ b/bacula/src/filed/verify_vol.c @@ -1,5 +1,5 @@ /* - * Bacula File Daemon verify-vol.c Verify files on a Volume + * Bacula File Daemon verify-vol.c Verify files on a Volume * versus attributes in Catalog * * Kern Sibbald, July MMII @@ -43,8 +43,8 @@ static char rec_header[] = "rechdr %ld %ld %ld %ld %ld"; void do_verify_volume(JCR *jcr) { BSOCK *sd, *dir; - POOLMEM *fname; /* original file name */ - POOLMEM *lname; /* link name */ + POOLMEM *fname; /* original file name */ + POOLMEM *lname; /* link name */ int32_t stream; uint32_t size; uint32_t VolSessionId, VolSessionTime, file_index; @@ -68,7 +68,7 @@ void do_verify_volume(JCR *jcr) if (client) { buf_size = client->max_network_buffer_size; } else { - buf_size = 0; /* use default */ + buf_size = 0; /* use default */ } if (!bnet_set_buffer_size(sd, buf_size, BNET_SETBUF_WRITE)) { set_jcr_job_status(jcr, JS_FatalError); @@ -87,9 +87,9 @@ void do_verify_volume(JCR *jcr) * First we expect a Stream Record Header */ if (sscanf(sd->msg, rec_header, &VolSessionId, &VolSessionTime, &file_index, - &stream, &size) != 5) { + &stream, &size) != 5) { Jmsg1(jcr, M_FATAL, 0, _("Record header scan error: %s\n"), sd->msg); - goto bail_out; + goto bail_out; } Dmsg2(30, "Got hdr: FilInx=%d Stream=%d.\n", file_index, stream); @@ -98,11 +98,11 @@ void do_verify_volume(JCR *jcr) */ if (bget_msg(sd) < 0) { Jmsg1(jcr, M_FATAL, 0, _("Data record error. ERR=%s\n"), bnet_strerror(sd)); - goto bail_out; + goto bail_out; } if (size != ((uint32_t)sd->msglen)) { Jmsg2(jcr, M_FATAL, 0, _("Actual data size %d not same as header %d\n"), sd->msglen, size); - goto bail_out; + goto bail_out; } Dmsg1(30, "Got stream data, len=%d\n", sd->msglen); @@ -110,98 +110,98 @@ void do_verify_volume(JCR *jcr) switch (stream) { case STREAM_UNIX_ATTRIBUTES: case STREAM_UNIX_ATTRIBUTES_EX: - char *ap, *lp, *fp; + char *ap, *lp, *fp; Dmsg0(400, "Stream=Unix Attributes.\n"); - if ((int)sizeof_pool_memory(fname) < sd->msglen) { - fname = realloc_pool_memory(fname, sd->msglen + 1); - } + if ((int)sizeof_pool_memory(fname) < sd->msglen) { + fname = realloc_pool_memory(fname, sd->msglen + 1); + } - if ((int)sizeof_pool_memory(lname) < sd->msglen) { - lname = realloc_pool_memory(lname, sd->msglen + 1); - } - *fname = 0; - *lname = 0; + if ((int)sizeof_pool_memory(lname) < sd->msglen) { + lname = realloc_pool_memory(lname, sd->msglen + 1); + } + *fname = 0; + *lname = 0; - /* - * An Attributes record consists of: - * File_index - * Type (FT_types) - * Filename - * Attributes - * Link name (if file linked i.e. FT_LNK) - * Extended Attributes (if Win32) - */ + /* + * An Attributes record consists of: + * File_index + * Type (FT_types) + * Filename + * Attributes + * Link name (if file linked i.e. FT_LNK) + * Extended Attributes (if Win32) + */ if (sscanf(sd->msg, "%d %d", &record_file_index, &type) != 2) { Jmsg(jcr, M_FATAL, 0, _("Error scanning record header: %s\n"), sd->msg); Dmsg0(0, "\nError scanning header\n"); - goto bail_out; - } + goto bail_out; + } Dmsg2(30, "Got Attr: FilInx=%d type=%d\n", record_file_index, type); - if (record_file_index != file_index) { + if (record_file_index != file_index) { Jmsg(jcr, M_FATAL, 0, _("Record header file index %ld not equal record index %ld\n"), - file_index, record_file_index); + file_index, record_file_index); Dmsg0(0, "File index error\n"); - goto bail_out; - } - ap = sd->msg; + goto bail_out; + } + ap = sd->msg; while (*ap++ != ' ') /* skip record file index */ - ; + ; while (*ap++ != ' ') /* skip type */ - ; - /* Save filename and position to attributes */ - fp = fname; - while (*ap != 0) { - *fp++ = *ap++; /* copy filename to fname */ - } - *fp = *ap++; /* terminate filename & point to attribs */ + ; + /* Save filename and position to attributes */ + fp = fname; + while (*ap != 0) { + *fp++ = *ap++; /* copy filename to fname */ + } + *fp = *ap++; /* terminate filename & point to attribs */ Dmsg1(200, "Attr=%s\n", ap); - /* Skip to Link name */ - if (type == FT_LNK || type == FT_LNKSAVED) { - lp = ap; - while (*lp++ != 0) { - ; - } + /* Skip to Link name */ + if (type == FT_LNK || type == FT_LNKSAVED) { + lp = ap; + while (*lp++ != 0) { + ; + } pm_strcat(lname, lp); /* "save" link name */ - } else { - *lname = 0; - } - P(jcr->mutex); - jcr->JobFiles++; - jcr->num_files_examined++; - pm_strcpy(jcr->last_fname, fname); /* last file examined */ - V(jcr->mutex); + } else { + *lname = 0; + } + jcr->lock(); + jcr->JobFiles++; + jcr->num_files_examined++; + pm_strcpy(jcr->last_fname, fname); /* last file examined */ + jcr->unlock(); - /* - * Send file attributes to Director - * File_index - * Stream - * Verify Options - * Filename (full path) - * Encoded attributes - * Link name (if type==FT_LNK) - * For a directory, link is the same as fname, but with trailing - * slash. For a linked file, link is the link. - */ - /* Send file attributes to Director */ + /* + * Send file attributes to Director + * File_index + * Stream + * Verify Options + * Filename (full path) + * Encoded attributes + * Link name (if type==FT_LNK) + * For a directory, link is the same as fname, but with trailing + * slash. For a linked file, link is the link. + */ + /* Send file attributes to Director */ Dmsg2(200, "send ATTR inx=%d fname=%s\n", jcr->JobFiles, fname); - if (type == FT_LNK || type == FT_LNKSAVED) { + if (type == FT_LNK || type == FT_LNKSAVED) { stat = bnet_fsend(dir, "%d %d %s %s%c%s%c%s%c", jcr->JobFiles, STREAM_UNIX_ATTRIBUTES, "pinsug5", fname, - 0, ap, 0, lname, 0); - } else { + 0, ap, 0, lname, 0); + } else { stat = bnet_fsend(dir,"%d %d %s %s%c%s%c%c", jcr->JobFiles, STREAM_UNIX_ATTRIBUTES, "pinsug5", fname, - 0, ap, 0, 0); - } + 0, ap, 0, 0); + } Dmsg2(200, "bfiled>bdird: attribs len=%d: msg=%s\n", dir->msglen, dir->msg); - if (!stat) { + if (!stat) { Jmsg(jcr, M_FATAL, 0, _("Network error in send to Director: ERR=%s\n"), bnet_strerror(dir)); - goto bail_out; - } - break; + goto bail_out; + } + break; /* Data streams to ignore */ case STREAM_ENCRYPTED_SESSION_DATA: @@ -213,8 +213,8 @@ void do_verify_volume(JCR *jcr) case STREAM_SPARSE_GZIP_DATA: case STREAM_SIGNED_DIGEST: - /* Do nothing */ - break; + /* Do nothing */ + break; case STREAM_MD5_DIGEST: bin_to_base64(digest, (char *)sd->msg, CRYPTO_DIGEST_MD5_SIZE); @@ -250,7 +250,7 @@ void do_verify_volume(JCR *jcr) default: Pmsg2(0, "None of above!!! stream=%d data=%s\n", stream,sd->msg); - break; + break; } /* end switch */ } /* end while bnet_get */ set_jcr_job_status(jcr, JS_Terminated); diff --git a/bacula/src/jcr.h b/bacula/src/jcr.h index 096661c7d6..86b2426ca6 100644 --- a/bacula/src/jcr.h +++ b/bacula/src/jcr.h @@ -109,15 +109,22 @@ typedef void (JCR_free_HANDLER)(JCR *jcr); /* Job Control Record (JCR) */ class JCR { +private: + pthread_mutex_t mutex; /* jcr mutex */ + volatile int _use_count; /* use count */ public: - void inc_use_count(void) {P(mutex); use_count++; V(mutex); }; - void dec_use_count(void) {P(mutex); use_count--; V(mutex); }; + void inc_use_count(void) {P(mutex); _use_count++; V(mutex); }; + void dec_use_count(void) {P(mutex); _use_count--; V(mutex); }; + int use_count() { return _use_count; }; + void init_mutex(void) {pthread_mutex_init(&mutex, NULL); }; + void destroy_mutex(void) {pthread_mutex_destroy(&mutex); }; + void lock() {P(mutex); }; + void unlock() {V(mutex); }; + bool is_job_canceled() {return job_canceled(this); }; /* Global part of JCR common to all daemons */ dlink link; /* JCR chain link */ - volatile int use_count; /* use count */ pthread_t my_thread_id; /* id of thread controlling jcr */ - pthread_mutex_t mutex; /* jcr mutex */ BSOCK *dir_bsock; /* Director bsock or NULL if we are him */ BSOCK *store_bsock; /* Storage connection socket */ BSOCK *file_bsock; /* File daemon connection socket */ @@ -169,7 +176,7 @@ public: /* This should be empty in the library */ #ifdef DIRECTOR_DAEMON - /* Director Daemon specific part of JCR */ + /* Director Daemon specific data part of JCR */ pthread_t SD_msg_chan; /* Message channel thread id */ pthread_cond_t term_wait; /* Wait for job termination */ workq_ele_t *work_item; /* Work queue item if scheduled */ diff --git a/bacula/src/lib/jcr.c b/bacula/src/lib/jcr.c index 6c7d6daa0d..0c6f6af616 100755 --- a/bacula/src/lib/jcr.c +++ b/bacula/src/lib/jcr.c @@ -229,8 +229,8 @@ JCR *new_jcr(int size, JCR_free_HANDLER *daemon_free_jcr) jcr->job_end_push.init(1, false); jcr->sched_time = time(NULL); jcr->daemon_free_jcr = daemon_free_jcr; /* plug daemon free routine */ - jcr->use_count = 1; - pthread_mutex_init(&(jcr->mutex), NULL); + jcr->inc_use_count(); + jcr->init_mutex(); jcr->JobStatus = JS_Created; /* ready to run */ jcr->VolumeName = get_pool_memory(PM_FNAME); jcr->VolumeName[0] = 0; @@ -329,7 +329,7 @@ static void free_common_jcr(JCR *jcr) default: break; } - pthread_mutex_destroy(&jcr->mutex); + jcr->destroy_mutex(); delete jcr->msg_queue; close_msg(jcr); /* close messages for this job */ @@ -396,14 +396,14 @@ void free_jcr(JCR *jcr) dequeue_messages(jcr); lock_jcr_chain(); jcr->dec_use_count(); /* decrement use count */ - if (jcr->use_count < 0) { + if (jcr->use_count() < 0) { Emsg2(M_ERROR, 0, _("JCR use_count=%d JobId=%d\n"), - jcr->use_count, jcr->JobId); + jcr->use_count(), jcr->JobId); } - Dmsg3(3400, "Dec free_jcr 0x%x use_count=%d jobid=%d\n", jcr, jcr->use_count, jcr->JobId); - if (jcr->use_count > 0) { /* if in use */ + Dmsg3(3400, "Dec free_jcr 0x%x use_count=%d jobid=%d\n", jcr, jcr->use_count(), jcr->JobId); + if (jcr->use_count() > 0) { /* if in use */ unlock_jcr_chain(); - Dmsg3(3400, "free_jcr 0x%x job=%d use_count=%d\n", jcr, jcr->JobId, jcr->use_count); + Dmsg3(3400, "free_jcr 0x%x job=%d use_count=%d\n", jcr, jcr->JobId, jcr->use_count()); return; } @@ -432,15 +432,14 @@ JCR *get_jcr_by_id(uint32_t JobId) { JCR *jcr; - lock_jcr_chain(); /* lock chain */ - foreach_dlist(jcr, jcrs) { + foreach_jcr(jcr) { if (jcr->JobId == JobId) { jcr->inc_use_count(); - Dmsg2(3400, "Inc get_jcr 0x%x use_count=%d\n", jcr, jcr->use_count); + Dmsg2(3400, "Inc get_jcr 0x%x use_count=%d\n", jcr, jcr->use_count()); break; } } - unlock_jcr_chain(); + endeach_jcr(jcr); return jcr; } @@ -453,16 +452,15 @@ JCR *get_jcr_by_session(uint32_t SessionId, uint32_t SessionTime) { JCR *jcr; - lock_jcr_chain(); - foreach_dlist(jcr, jcrs) { + foreach_jcr(jcr) { if (jcr->VolSessionId == SessionId && jcr->VolSessionTime == SessionTime) { jcr->inc_use_count(); - Dmsg2(3400, "Inc get_jcr 0x%x use_count=%d\n", jcr, jcr->use_count); + Dmsg2(3400, "Inc get_jcr 0x%x use_count=%d\n", jcr, jcr->use_count()); break; } } - unlock_jcr_chain(); + endeach_jcr(jcr); return jcr; } @@ -482,16 +480,15 @@ JCR *get_jcr_by_partial_name(char *Job) if (!Job) { return NULL; } - lock_jcr_chain(); len = strlen(Job); - foreach_dlist(jcr, jcrs) { + foreach_jcr(jcr) { if (strncmp(Job, jcr->Job, len) == 0) { jcr->inc_use_count(); - Dmsg2(3400, "Inc get_jcr 0x%x use_count=%d\n", jcr, jcr->use_count); + Dmsg2(3400, "Inc get_jcr 0x%x use_count=%d\n", jcr, jcr->use_count()); break; } } - unlock_jcr_chain(); + endeach_jcr(jcr); return jcr; } @@ -509,15 +506,14 @@ JCR *get_jcr_by_full_name(char *Job) if (!Job) { return NULL; } - lock_jcr_chain(); - foreach_dlist(jcr, jcrs) { + foreach_jcr(jcr) { if (strcmp(jcr->Job, Job) == 0) { jcr->inc_use_count(); - Dmsg2(3400, "Inc get_jcr 0x%x use_count=%d\n", jcr, jcr->use_count); + Dmsg2(3400, "Inc get_jcr 0x%x use_count=%d\n", jcr, jcr->use_count()); break; } } - unlock_jcr_chain(); + endeach_jcr(jcr); return jcr; } @@ -600,7 +596,7 @@ JCR *jcr_walk_start() if (jcr) { jcr->inc_use_count(); Dmsg3(3400, "Inc jcr_walk_start 0x%x job=%d use_count=%d\n", jcr, - jcr->JobId, jcr->use_count); + jcr->JobId, jcr->use_count()); } unlock_jcr_chain(); return jcr; @@ -618,7 +614,7 @@ JCR *jcr_walk_next(JCR *prev_jcr) if (jcr) { jcr->inc_use_count(); Dmsg3(3400, "Inc jcr_walk_next 0x%x job=%d use_count=%d\n", jcr, - jcr->JobId, jcr->use_count); + jcr->JobId, jcr->use_count()); } unlock_jcr_chain(); if (prev_jcr) { diff --git a/bacula/src/lib/rwlock.c b/bacula/src/lib/rwlock.c index d66a5cc983..fc70892cfd 100644 --- a/bacula/src/lib/rwlock.c +++ b/bacula/src/lib/rwlock.c @@ -13,22 +13,17 @@ * */ /* - Copyright (C) 2000-2004 Kern Sibbald and John Walker + Copyright (C) 2001-2006 Kern Sibbald This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of - the License, or (at your option) any later version. + modify it under the terms of the GNU General Public License + version 2 as amended with additional clauses defined in the + file LICENSE in the main source directory. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public - License along with this program; if not, write to the Free - Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, - MA 02111-1307, USA. + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + the file LICENSE for additional details. */ diff --git a/bacula/src/lib/rwlock.h b/bacula/src/lib/rwlock.h index 8abd1b8311..178410945c 100644 --- a/bacula/src/lib/rwlock.h +++ b/bacula/src/lib/rwlock.h @@ -11,22 +11,17 @@ * */ /* - Copyright (C) 2000-2003 Kern Sibbald and John Walker + Copyright (C) 2001-2006 Kern Sibbald This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of - the License, or (at your option) any later version. + modify it under the terms of the GNU General Public License + version 2 as amended with additional clauses defined in the + file LICENSE in the main source directory. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public - License along with this program; if not, write to the Free - Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, - MA 02111-1307, USA. + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + the file LICENSE for additional details. */ diff --git a/bacula/src/stored/acquire.c b/bacula/src/stored/acquire.c index 4d32968cfc..9528f786c5 100644 --- a/bacula/src/stored/acquire.c +++ b/bacula/src/stored/acquire.c @@ -48,6 +48,7 @@ bool acquire_device_for_read(DCR *dcr) int i; int vol_label_status; + Dmsg1(50, "jcr->dcr=%p\n", jcr->dcr); dev->block(BST_DOING_ACQUIRE); if (dev->num_writers > 0) { @@ -84,6 +85,9 @@ bool acquire_device_for_read(DCR *dcr) RCTX rctx; DIRSTORE *store; int stat; + DCR *dcr_save = jcr->dcr; + + jcr->dcr = NULL; memset(&rctx, 0, sizeof(RCTX)); rctx.jcr = jcr; jcr->reserve_msgs = New(alist(10, not_owned_by_alist)); @@ -132,10 +136,13 @@ bool acquire_device_for_read(DCR *dcr) /* error */ Jmsg1(jcr, M_FATAL, 0, _("No suitable device found to read Volume \"%s\"\n"), vol->VolumeName); + jcr->dcr = dcr_save; goto get_out; } + jcr->dcr = dcr_save; } + init_device_wait_timers(dcr); tape_previously_mounted = dev->can_read() || dev->can_append() || @@ -255,6 +262,7 @@ get_out: } V(dev->mutex); dev->unblock(); + Dmsg1(50, "jcr->dcr=%p\n", jcr->dcr); return ok; } diff --git a/bacula/src/stored/askdir.c b/bacula/src/stored/askdir.c index 16557abd98..b7fc61a113 100644 --- a/bacula/src/stored/askdir.c +++ b/bacula/src/stored/askdir.c @@ -162,7 +162,7 @@ static bool do_get_volume_info(DCR *dcr) return false; } memset(&vol, 0, sizeof(vol)); - Dmsg1(100, "msg); + Dmsg1(110, "msg); n = sscanf(dir->msg, OK_media, vol.VolCatName, &vol.VolCatJobs, &vol.VolCatFiles, &vol.VolCatBlocks, &vol.VolCatBytes, @@ -174,7 +174,7 @@ static bool do_get_volume_info(DCR *dcr) &vol.EndFile, &vol.EndBlock, &vol.VolCatParts, &vol.LabelType); if (n != 21) { - Dmsg2(100, "Bad response from Dir fields=%d: %s", n, dir->msg); + Dmsg2(110, "Bad response from Dir fields=%d: %s", n, dir->msg); Mmsg(jcr->errmsg, _("Error getting Volume info: %s"), dir->msg); return false; } @@ -228,10 +228,6 @@ bool dir_find_next_appendable_volume(DCR *dcr) JCR *jcr = dcr->jcr; BSOCK *dir = jcr->dir_bsock; bool found = false; - /* This mutex should keep different devices from getting the - * same Volume. - */ - static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; Dmsg0(200, "dir_find_next_appendable_volume\n"); /* @@ -239,7 +235,7 @@ bool dir_find_next_appendable_volume(DCR *dcr) * the most available could already be mounted on another * drive, so we continue looking for a not in use Volume. */ - P(mutex); + lock_reservations(); for (int vol_index=1; vol_index < 20; vol_index++) { bash_spaces(dcr->media_type); bash_spaces(dcr->pool_name); @@ -257,7 +253,8 @@ bool dir_find_next_appendable_volume(DCR *dcr) continue; } } else { - Dmsg0(200, "No volume info, return false\n"); + Dmsg2(100, "No vol. index %d return false. dev=%s\n", vol_index, + dcr->dev->print_name()); found = false; break; } @@ -265,11 +262,11 @@ bool dir_find_next_appendable_volume(DCR *dcr) if (found) { Dmsg0(400, "dir_find_next_appendable_volume return true\n"); new_volume(dcr, dcr->VolumeName); /* reserve volume */ - V(mutex); + unlock_reservations(); return true; } dcr->VolumeName[0] = 0; - V(mutex); + unlock_reservations(); return false; } diff --git a/bacula/src/stored/autochanger.c b/bacula/src/stored/autochanger.c index e135852474..9e2f01025a 100644 --- a/bacula/src/stored/autochanger.c +++ b/bacula/src/stored/autochanger.c @@ -148,7 +148,7 @@ int autoload_device(DCR *dcr, int writing, BSOCK *dir) * Load the desired cassette */ lock_changer(dcr); - Dmsg1(400, "Doing changer load slot %d\n", slot); + Dmsg1(100, "Doing changer load slot %d\n", slot); Jmsg(jcr, M_INFO, 0, _("3304 Issuing autochanger \"load slot %d, drive %d\" command.\n"), slot, drive); @@ -161,21 +161,24 @@ int autoload_device(DCR *dcr, int writing, BSOCK *dir) if (status == 0) { Jmsg(jcr, M_INFO, 0, _("3305 Autochanger \"load slot %d, drive %d\", status is OK.\n"), slot, drive); + Dmsg2(100, "load slot %d, drive %d, status is OK.\n", slot, drive); dev->Slot = slot; /* set currently loaded slot */ } else { - berrno be; - be.set_errno(status); + berrno be; + be.set_errno(status); + Dmsg3(100, "load slot %d, drive %d, bad stats=%s.\n", slot, drive, + be.strerror()); Jmsg(jcr, M_FATAL, 0, _("3992 Bad autochanger \"load slot %d, drive %d\": ERR=%s.\n"), slot, drive, be.strerror()); rtn_stat = -1; /* hard error */ } - Dmsg2(400, "load slot %d status=%d\n", slot, status); + Dmsg2(100, "load slot %d status=%d\n", slot, status); unlock_changer(dcr); } else { status = 0; /* we got what we want */ dev->Slot = slot; /* set currently loaded slot */ } - Dmsg1(400, "After changer, status=%d\n", status); + Dmsg1(100, "After changer, status=%d\n", status); if (status == 0) { /* did we succeed? */ rtn_stat = 1; /* tape loaded by changer */ } @@ -220,9 +223,9 @@ int get_autochanger_loaded_slot(DCR *dcr) drive); changer = edit_device_codes(dcr, changer, dcr->device->changer_command, "loaded"); *results = 0; - Dmsg1(200, "Run program=%s\n", changer); + Dmsg1(100, "Run program=%s\n", changer); status = run_program(changer, timeout, results); - Dmsg3(200, "run_prog: %s stat=%d result=%s\n", changer, status, results); + Dmsg3(100, "run_prog: %s stat=%d result=%s\n", changer, status, results); if (status == 0) { loaded = str_to_int32(results); if (loaded > 0) { @@ -251,7 +254,7 @@ static void lock_changer(DCR *dcr) { AUTOCHANGER *changer_res = dcr->device->changer_res; if (changer_res) { - Dmsg1(100, "Locking changer %s\n", changer_res->hdr.name); + Dmsg1(200, "Locking changer %s\n", changer_res->hdr.name); P(changer_res->changer_mutex); /* Lock changer script */ } } @@ -260,7 +263,7 @@ static void unlock_changer(DCR *dcr) { AUTOCHANGER *changer_res = dcr->device->changer_res; if (changer_res) { - Dmsg1(100, "Unlocking changer %s\n", changer_res->hdr.name); + Dmsg1(200, "Unlocking changer %s\n", changer_res->hdr.name); V(changer_res->changer_mutex); /* Unlock changer script */ } } @@ -303,7 +306,7 @@ bool unload_autochanger(DCR *dcr, int loaded) changer = edit_device_codes(dcr, changer, dcr->device->changer_command, "unload"); dev->close(); - Dmsg1(200, "Run program=%s\n", changer); + Dmsg1(100, "Run program=%s\n", changer); int stat = run_program(changer, timeout, NULL); dcr->VolCatInfo.Slot = slot; if (stat != 0) { @@ -356,7 +359,11 @@ static bool unload_other_drive(DCR *dcr, int slot) } /* The Volume we want is on another device. */ - + if (dev->is_busy()) { + Dmsg4(100, "Vol %s for dev=%s in use dev=%s slot=%d\n", + dcr->VolumeName, dcr->dev->print_name(), + dev->print_name(), slot); + } for (int i=0; i < 3; i++) { if (dev->is_busy()) { wait_for_device(dcr->jcr, first); @@ -369,8 +376,9 @@ static bool unload_other_drive(DCR *dcr, int slot) if (dev->is_busy()) { Jmsg(jcr, M_WARNING, 0, _("Volume \"%s\" is in use by device %s\n"), dcr->VolumeName, dev->print_name()); - Dmsg2(200, "Volume \"%s\" is in use by device %s\n", - dcr->VolumeName, dev->print_name()); + Dmsg4(100, "Vol %s for dev=%s is busy dev=%s slot=%d\n", + dcr->VolumeName, dcr->dev->print_name(), dev->print_name(), slot); + Dmsg2(100, "num_writ=%d reserv=%d\n", dev->num_writers, dev->reserved_device); V(dev->mutex); return false; } @@ -381,7 +389,7 @@ static bool unload_other_drive(DCR *dcr, int slot) _("3307 Issuing autochanger \"unload slot %d, drive %d\" command.\n"), slot, dev->drive_index); - Dmsg2(200, "Issuing autochanger \"unload slot %d, drive %d\" command.\n", + Dmsg2(100, "Issuing autochanger \"unload slot %d, drive %d\" command.\n", slot, dev->drive_index); save_dev = dcr->dev; @@ -393,7 +401,7 @@ static bool unload_other_drive(DCR *dcr, int slot) dev->close(); Dmsg2(200, "close dev=%s reserve=%d\n", dev->print_name(), dev->reserved_device); - Dmsg1(200, "Run program=%s\n", changer_cmd); + Dmsg1(100, "Run program=%s\n", changer_cmd); int stat = run_program(changer_cmd, timeout, NULL); dcr->VolCatInfo.Slot = save_slot; dcr->dev = save_dev; @@ -403,12 +411,12 @@ static bool unload_other_drive(DCR *dcr, int slot) Jmsg(jcr, M_INFO, 0, _("3995 Bad autochanger \"unload slot %d, drive %d\": ERR=%s.\n"), slot, dev->drive_index, be.strerror()); - Dmsg3(200, "Bad autochanger \"unload slot %d, drive %d\": ERR=%s.\n", + Dmsg3(100, "Bad autochanger \"unload slot %d, drive %d\": ERR=%s.\n", slot, dev->drive_index, be.strerror()); ok = false; } else { dev->Slot = 0; /* nothing loaded */ - Dmsg0(200, "Slot unloaded\n"); + Dmsg0(100, "Slot unloaded\n"); } unlock_changer(dcr); V(dev->mutex); diff --git a/bacula/src/stored/bls.c b/bacula/src/stored/bls.c index ee3343889f..933af258c2 100644 --- a/bacula/src/stored/bls.c +++ b/bacula/src/stored/bls.c @@ -277,7 +277,7 @@ static void do_blocks(char *infname) free_record(record); Jmsg(jcr, M_INFO, 0, _("Mounted Volume \"%s\".\n"), dcr->VolumeName); } else if (dev->at_eof()) { - Jmsg(jcr, M_INFO, 0, _("Got EOF at file %u on device %s, Volume \"%s\"\n"), + Jmsg(jcr, M_INFO, 0, _("End of file %u on device %s, Volume \"%s\"\n"), dev->file, dev->print_name(), dcr->VolumeName); Dmsg0(20, "read_record got eof. try again\n"); continue; diff --git a/bacula/src/stored/butil.c b/bacula/src/stored/butil.c index 44f190761f..43a8088a8f 100644 --- a/bacula/src/stored/butil.c +++ b/bacula/src/stored/butil.c @@ -12,7 +12,7 @@ * Version $Id$ */ /* - Copyright (C) 2000-2005 Kern Sibbald + Copyright (C) 2000-2006 Kern Sibbald This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License @@ -93,7 +93,7 @@ JCR *setup_jcr(const char *name, char *dev_name, BSR *bsr, pm_strcpy(jcr->fileset_md5, "Dummy.fileset.md5"); init_autochangers(); - init_volume_list(); + create_volume_list(); dcr = setup_to_access_device(jcr, dev_name, VolumeName, mode); if (!dcr) { @@ -121,6 +121,8 @@ static DCR *setup_to_access_device(JCR *jcr, char *dev_name, DCR *dcr; char VolName[MAX_NAME_LENGTH]; + init_reservations_lock(); + /* * If no volume name already given and no bsr, and it is a file, * try getting name from Filename diff --git a/bacula/src/stored/dev.c b/bacula/src/stored/dev.c index a00a6e25ea..8695fa04b8 100644 --- a/bacula/src/stored/dev.c +++ b/bacula/src/stored/dev.c @@ -1503,7 +1503,7 @@ weof_dev(DEVICE *dev, int num) { struct mtop mt_com; int stat; - Dmsg0(29, "weof_dev\n"); + Dmsg0(129, "weof_dev\n"); if (dev->fd < 0) { dev->dev_errno = EBADF; @@ -1672,6 +1672,8 @@ void DEVICE::close() if (fd >= 0) { ::close(fd); } else { + Dmsg2(100, "device %s already closed vol=%s\n", print_name(), + VolHdr.VolumeName); return; /* already closed */ } @@ -1705,8 +1707,8 @@ void DEVICE::close() part_size = 0; part_start = 0; EndFile = EndBlock = 0; - memset(&VolCatInfo, 0, sizeof(VolCatInfo)); free_volume(this); + memset(&VolCatInfo, 0, sizeof(VolCatInfo)); memset(&VolHdr, 0, sizeof(VolHdr)); if (tid) { stop_thread_timer(tid); @@ -1744,7 +1746,7 @@ bool DEVICE::truncate(DCR *dcr) /* We need the DCR for DVD-writing */ */ bool DEVICE::mount(int timeout) { - Dmsg0(90, "Enter mount\n"); + Dmsg0(190, "Enter mount\n"); if (is_mounted()) { return true; } else if (requires_mount()) { diff --git a/bacula/src/stored/dircmd.c b/bacula/src/stored/dircmd.c index a371688fe3..b00f425233 100644 --- a/bacula/src/stored/dircmd.c +++ b/bacula/src/stored/dircmd.c @@ -262,13 +262,13 @@ static bool cancel_cmd(JCR *cjcr) if (!(jcr=get_jcr_by_full_name(Job))) { bnet_fsend(dir, _("3904 Job %s not found.\n"), Job); } else { - P(jcr->mutex); + jcr->lock(); oldStatus = jcr->JobStatus; set_jcr_job_status(jcr, JS_Canceled); if (!jcr->authenticated && oldStatus == JS_WaitFD) { pthread_cond_signal(&jcr->job_start_wait); /* wake waiting thread */ } - V(jcr->mutex); + jcr->unlock(); if (jcr->file_bsock) { bnet_sig(jcr->file_bsock, BNET_TERMINATE); } diff --git a/bacula/src/stored/fd_cmds.c b/bacula/src/stored/fd_cmds.c index aca7e3c0d4..41df7eae30 100644 --- a/bacula/src/stored/fd_cmds.c +++ b/bacula/src/stored/fd_cmds.c @@ -16,19 +16,14 @@ Copyright (C) 2000-2006 Kern Sibbald This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of - the License, or (at your option) any later version. + modify it under the terms of the GNU General Public License + version 2 as amended with additional clauses defined in the + file LICENSE in the main source directory. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public - License along with this program; if not, write to the Free - Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, - MA 02111-1307, USA. + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + the file LICENSE for additional details. */ @@ -138,7 +133,7 @@ void run_job(JCR *jcr) for (i=0; fd_cmds[i].cmd; i++) { if (strncmp(fd_cmds[i].cmd, fd->msg, strlen(fd_cmds[i].cmd)) == 0) { found = true; /* indicate command found */ - if (!fd_cmds[i].func(jcr)) { /* do command */ + if (!fd_cmds[i].func(jcr) || job_canceled(jcr)) { /* do command */ set_jcr_job_status(jcr, JS_ErrorTerminated); quit = true; } diff --git a/bacula/src/stored/job.c b/bacula/src/stored/job.c index 4809fc7286..0d188eb593 100644 --- a/bacula/src/stored/job.c +++ b/bacula/src/stored/job.c @@ -24,6 +24,8 @@ #include "bacula.h" #include "stored.h" +static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; + /* Imported variables */ extern uint32_t VolSessionTime; @@ -159,14 +161,14 @@ bool run_cmd(JCR *jcr) * when he does, we will be released, unless the 30 minutes * expires. */ - P(jcr->mutex); + P(mutex); for ( ;!job_canceled(jcr); ) { - errstat = pthread_cond_timedwait(&jcr->job_start_wait, &jcr->mutex, &timeout); + errstat = pthread_cond_timedwait(&jcr->job_start_wait, &mutex, &timeout); if (errstat == 0 || errstat == ETIMEDOUT) { break; } } - V(jcr->mutex); + V(mutex); memset(jcr->sd_auth_key, 0, strlen(jcr->sd_auth_key)); @@ -215,12 +217,10 @@ void handle_filed_connection(BSOCK *fd, char *job_name) Dmsg1(110, "OK Authentication Job %s\n", jcr->Job); } - P(jcr->mutex); if (!jcr->authenticated) { set_jcr_job_status(jcr, JS_ErrorTerminated); } pthread_cond_signal(&jcr->job_start_wait); /* wake waiting job */ - V(jcr->mutex); free_jcr(jcr); return; } diff --git a/bacula/src/stored/mac.c b/bacula/src/stored/mac.c index aedbf100b4..6eaec57f96 100644 --- a/bacula/src/stored/mac.c +++ b/bacula/src/stored/mac.c @@ -62,55 +62,63 @@ bool do_mac(JCR *jcr) Dmsg0(20, "Start read data.\n"); + if (!jcr->read_dcr || !jcr->dcr) { + Jmsg(jcr, M_FATAL, 0, _("Read and write devices not properly initialized.\n")); + goto bail_out; + } + Dmsg2(000, "read_dcr=%p write_dcr=%p\n", jcr->read_dcr, jcr->dcr); + create_restore_volume_list(jcr); if (jcr->NumVolumes == 0) { Jmsg(jcr, M_FATAL, 0, _("No Volume names found for %s.\n"), Type); - free_restore_volume_list(jcr); - return false; + goto bail_out; } Dmsg3(200, "Found %d volumes names for %s. First=%s\n", jcr->NumVolumes, jcr->VolList->VolumeName, Type); - /* Ready device for reading */ - if (!acquire_device_for_read(jcr->read_dcr)) { - ok = false; - goto bail_out; - } - - if (!acquire_device_for_append(jcr->dcr)) { + /* Ready devices for reading and writing */ + if (!acquire_device_for_read(jcr->read_dcr) || + !acquire_device_for_append(jcr->dcr)) { set_jcr_job_status(jcr, JS_ErrorTerminated); - ok = false; goto bail_out; } + jcr->dcr->VolFirstIndex = jcr->dcr->VolLastIndex = 0; jcr->run_time = time(NULL); ok = read_records(jcr->read_dcr, record_cb, mount_next_read_volume); + goto ok_out; bail_out: - - dev = jcr->dcr->dev; - if (ok || dev->can_write()) { - /* Flush out final partial block of this session */ - if (!write_block_to_device(jcr->dcr)) { - Jmsg2(jcr, M_FATAL, 0, _("Fatal append error on device %s: ERR=%s\n"), - dev->print_name(), dev->bstrerror()); - Dmsg0(100, _("Set ok=FALSE after write_block_to_device.\n")); - ok = false; - } - } + ok = false; + +ok_out: + if (jcr->dcr) { + dev = jcr->dcr->dev; + if (ok || dev->can_write()) { + /* Flush out final partial block of this session */ + if (!write_block_to_device(jcr->dcr)) { + Jmsg2(jcr, M_FATAL, 0, _("Fatal append error on device %s: ERR=%s\n"), + dev->print_name(), dev->bstrerror()); + Dmsg0(100, _("Set ok=FALSE after write_block_to_device.\n")); + ok = false; + } + } - if (ok && dev->is_dvd()) { - ok = dvd_close_job(jcr->dcr); /* do DVD cleanup if any */ + if (ok && dev->is_dvd()) { + ok = dvd_close_job(jcr->dcr); /* do DVD cleanup if any */ + } + /* Release the device -- and send final Vol info to DIR */ + release_device(jcr->dcr); } - /* Release the device -- and send final Vol info to DIR */ - release_device(jcr->dcr); - if (!release_device(jcr->read_dcr)) { - ok = false; + if (jcr->read_dcr) { + if (!release_device(jcr->read_dcr)) { + ok = false; + } } free_restore_volume_list(jcr); diff --git a/bacula/src/stored/protos.h b/bacula/src/stored/protos.h index 7b7da1390e..42b24ba050 100644 --- a/bacula/src/stored/protos.h +++ b/bacula/src/stored/protos.h @@ -208,12 +208,16 @@ bool read_records(DCR *dcr, bool mount_cb(DCR *dcr)); /* From reserve.c */ +void init_reservations_lock(); +void term_reservations_lock(); +void lock_reservations(); +void unlock_reservations(); void release_volume(DCR *dcr); VOLRES *new_volume(DCR *dcr, const char *VolumeName); VOLRES *find_volume(const char *VolumeName); bool free_volume(DEVICE *dev); void free_unused_volume(DCR *dcr); -void init_volume_list(); +void create_volume_list(); void free_volume_list(); void list_volumes(BSOCK *user); bool is_volume_in_use(DCR *dcr); diff --git a/bacula/src/stored/read_record.c b/bacula/src/stored/read_record.c index f450574132..36064d06fb 100644 --- a/bacula/src/stored/read_record.c +++ b/bacula/src/stored/read_record.c @@ -107,10 +107,10 @@ bool read_records(DCR *dcr, } else if (dev->at_eof()) { if (verbose) { - Jmsg(jcr, M_INFO, 0, _("Got EOF at file %u on device %s, Volume \"%s\"\n"), + Jmsg(jcr, M_INFO, 0, _("End of file %u on device %s, Volume \"%s\"\n"), dev->file, dev->print_name(), dcr->VolumeName); } - Dmsg3(200, "Got EOF at file %u on device %s, Volume \"%s\"\n", + Dmsg3(200, "End of file %u on device %s, Volume \"%s\"\n", dev->file, dev->print_name(), dcr->VolumeName); continue; } else if (dev->is_short_block()) { diff --git a/bacula/src/stored/reserve.c b/bacula/src/stored/reserve.c index dd38158e44..41c5797d4b 100644 --- a/bacula/src/stored/reserve.c +++ b/bacula/src/stored/reserve.c @@ -26,10 +26,8 @@ #include "bacula.h" #include "stored.h" - static dlist *vol_list = NULL; static pthread_mutex_t vol_list_lock = PTHREAD_MUTEX_INITIALIZER; -static pthread_mutex_t search_lock = PTHREAD_MUTEX_INITIALIZER; /* Forward referenced functions */ static int can_reserve_drive(DCR *dcr, RCTX &rctx); @@ -67,6 +65,45 @@ static int my_compare(void *item1, void *item2) return strcmp(((VOLRES *)item1)->vol_name, ((VOLRES *)item2)->vol_name); } +static brwlock_t reservation_lock; + +void init_reservations_lock() +{ + int errstat; + if ((errstat=rwl_init(&reservation_lock)) != 0) { + berrno be; + Emsg1(M_ABORT, 0, _("Unable to initialize reservation lock. ERR=%s\n"), + be.strerror(errstat)); + } + +} + +void term_reservations_lock() +{ + rwl_destroy(&reservation_lock); +} + +/* This applies to a drive and to Volumes */ +void lock_reservations() +{ + int errstat; + if ((errstat=rwl_writelock(&reservation_lock)) != 0) { + berrno be; + Emsg2(M_ABORT, 0, "rwl_writelock failure. stat=%d: ERR=%s\n", + errstat, be.strerror(errstat)); + } +} + +void unlock_reservations() +{ + int errstat; + if ((errstat=rwl_writeunlock(&reservation_lock)) != 0) { + berrno be; + Emsg2(M_ABORT, 0, "rwl_writeunlock failure. stat=%d: ERR=%s\n", + errstat, be.strerror(errstat)); + } +} + /* * Put a new Volume entry in the Volume list. This @@ -81,6 +118,12 @@ VOLRES *new_volume(DCR *dcr, const char *VolumeName) VOLRES *vol, *nvol; Dmsg1(400, "new_volume %s\n", VolumeName); + /* + * We lock the reservations system here to ensure + * when adding a new volume that no newly scheduled + * job can reserve it. + */ + lock_reservations(); P(vol_list_lock); if (dcr->dev) { again: @@ -100,16 +143,27 @@ again: vol->vol_name = bstrdup(VolumeName); vol->dev = dcr->dev; vol->dcr = dcr; + Dmsg2(100, "New Vol=%s dev=%s\n", VolumeName, dcr->dev->print_name()); nvol = (VOLRES *)vol_list->binary_insert(vol, my_compare); if (nvol != vol) { free(vol->vol_name); free(vol); vol = NULL; if (dcr->dev) { - nvol->dev = dcr->dev; + DEVICE *dev = nvol->dev; + if (!dev->is_busy()) { + Dmsg3(100, "Swap vol=%s from dev=%s to %s\n", VolumeName, + dev->print_name(), dcr->dev->print_name()); + nvol->dev = dcr->dev; + dev->VolHdr.VolumeName[0] = 0; + } else { + Dmsg3(100, "!!!! could not swap vol=%s from dev=%s to %s\n", VolumeName, + dev->print_name(), dcr->dev->print_name()); + } } } V(vol_list_lock); + unlock_reservations(); return vol; } @@ -122,6 +176,7 @@ again: VOLRES *find_volume(const char *VolumeName) { VOLRES vol, *fvol; + /* Do not lock reservations here */ P(vol_list_lock); vol.vol_name = bstrdup(VolumeName); fvol = (VOLRES *)vol_list->binary_search(&vol, my_compare); @@ -140,8 +195,9 @@ bool free_volume(DEVICE *dev) { VOLRES vol, *fvol; - P(vol_list_lock); + P(vol_list_lock); if (dev->VolHdr.VolumeName[0] == 0) { + Dmsg1(100, "free_volume: no vol on dev %s\n", dev->print_name()); /* * Our device has no VolumeName listed, but * search the list for any Volume attached to @@ -151,6 +207,7 @@ bool free_volume(DEVICE *dev) if (fvol && fvol->dev == dev) { vol_list->remove(fvol); if (fvol->vol_name) { + Dmsg2(100, "free_volume %s dev=%s\n", fvol->vol_name, dev->print_name()); free(fvol->vol_name); } free(fvol); @@ -164,6 +221,7 @@ bool free_volume(DEVICE *dev) fvol = (VOLRES *)vol_list->binary_search(&vol, my_compare); if (fvol) { vol_list->remove(fvol); + Dmsg2(100, "free_volume %s dev=%s\n", fvol->vol_name, dev->print_name()); free(fvol->vol_name); free(fvol); } @@ -184,6 +242,7 @@ void free_unused_volume(DCR *dcr) if (vol->dcr == dcr && (vol->dev == NULL || strcmp(vol->vol_name, vol->dev->VolHdr.VolumeName) != 0)) { vol_list->remove(vol); + Dmsg1(100, "free_unused_olume %s\n", vol->vol_name); free(vol->vol_name); free(vol); break; @@ -198,6 +257,7 @@ void free_unused_volume(DCR *dcr) void list_volumes(BSOCK *user) { VOLRES *vol; + P(vol_list_lock); for (vol=(VOLRES *)vol_list->first(); vol; vol=(VOLRES *)vol_list->next(vol)) { if (vol->dev) { bnet_fsend(user, "%s on device %s\n", vol->vol_name, vol->dev->print_name()); @@ -205,10 +265,11 @@ void list_volumes(BSOCK *user) bnet_fsend(user, "%s\n", vol->vol_name); } } + V(vol_list_lock); } /* Create the Volume list */ -void init_volume_list() +void create_volume_list() { VOLRES *dummy = NULL; if (vol_list == NULL) { @@ -223,29 +284,36 @@ void free_volume_list() if (!vol_list) { return; } + P(vol_list_lock); for (vol=(VOLRES *)vol_list->first(); vol; vol=(VOLRES *)vol_list->next(vol)) { Dmsg3(000, "Unreleased Volume=%s dcr=0x%x dev=0x%x\n", vol->vol_name, vol->dcr, vol->dev); } delete vol_list; vol_list = NULL; + V(vol_list_lock); } bool is_volume_in_use(DCR *dcr) { VOLRES *vol = find_volume(dcr->VolumeName); if (!vol) { + Dmsg1(100, "Vol=%s not in use.\n", dcr->VolumeName); return false; /* vol not in list */ } if (!vol->dev) { /* vol not attached to device */ + Dmsg1(100, "Vol=%s has no dev.\n", dcr->VolumeName); return false; } if (dcr->dev == vol->dev) { /* same device OK */ + Dmsg1(100, "Vol=%s on same dev.\n", dcr->VolumeName); return false; } if (!vol->dev->is_busy()) { + Dmsg2(100, "Vol=%s dev=%s not busy.\n", dcr->VolumeName, vol->dev->print_name()); return false; } + Dmsg2(100, "Vol=%s used by %s.\n", dcr->VolumeName, vol->dev->print_name()); return true; } @@ -320,11 +388,11 @@ static bool use_storage_cmd(JCR *jcr) /* ***FIXME**** remove after 1.38 release */ char *device_name; foreach_alist(store, jcr->dirstore) { - Dmsg5(100, "Storage=%s media_type=%s pool=%s pool_type=%s append=%d\n", + Dmsg5(110, "Storage=%s media_type=%s pool=%s pool_type=%s append=%d\n", store->name, store->media_type, store->pool_name, store->pool_type, store->append); foreach_alist(device_name, store->device) { - Dmsg1(100, " Device=%s\n", device_name); + Dmsg1(110, " Device=%s\n", device_name); } } #endif @@ -343,7 +411,7 @@ static bool use_storage_cmd(JCR *jcr) bool first = true; /* print wait message once */ rctx.notify_dir = true; for ( ; !job_canceled(jcr); ) { - P(search_lock); /* only one thread at a time */ + lock_reservations(); /* only one thread at a time */ while ((msg = (char *)msgs->pop())) { free(msg); } @@ -357,7 +425,7 @@ static bool use_storage_cmd(JCR *jcr) rctx.PreferMountedVols = false; rctx.exact_match = false; rctx.autochanger_only = true; - Dmsg5(100, "PrefMnt=%d exact=%d suitable=%d chgronly=%d any=%d\n", + Dmsg5(110, "PrefMnt=%d exact=%d suitable=%d chgronly=%d any=%d\n", rctx.PreferMountedVols, rctx.exact_match, rctx.suitable_device, rctx.autochanger_only, rctx.any_drive); if ((ok = find_suitable_device_for_job(jcr, rctx))) { @@ -372,7 +440,7 @@ static bool use_storage_cmd(JCR *jcr) rctx.try_low_use_drive = false; } rctx.autochanger_only = false; - Dmsg5(100, "PrefMnt=%d exact=%d suitable=%d chgronly=%d any=%d\n", + Dmsg5(110, "PrefMnt=%d exact=%d suitable=%d chgronly=%d any=%d\n", rctx.PreferMountedVols, rctx.exact_match, rctx.suitable_device, rctx.autochanger_only, rctx.any_drive); if ((ok = find_suitable_device_for_job(jcr, rctx))) { @@ -383,7 +451,7 @@ static bool use_storage_cmd(JCR *jcr) rctx.PreferMountedVols = true; rctx.exact_match = true; rctx.autochanger_only = false; - Dmsg5(100, "PrefMnt=%d exact=%d suitable=%d chgronly=%d any=%d\n", + Dmsg5(110, "PrefMnt=%d exact=%d suitable=%d chgronly=%d any=%d\n", rctx.PreferMountedVols, rctx.exact_match, rctx.suitable_device, rctx.autochanger_only, rctx.any_drive); if ((ok = find_suitable_device_for_job(jcr, rctx))) { @@ -391,7 +459,7 @@ static bool use_storage_cmd(JCR *jcr) } /* Look for any mounted drive */ rctx.exact_match = false; - Dmsg5(100, "PrefMnt=%d exact=%d suitable=%d chgronly=%d any=%d\n", + Dmsg5(110, "PrefMnt=%d exact=%d suitable=%d chgronly=%d any=%d\n", rctx.PreferMountedVols, rctx.exact_match, rctx.suitable_device, rctx.autochanger_only, rctx.any_drive); if ((ok = find_suitable_device_for_job(jcr, rctx))) { @@ -399,15 +467,16 @@ static bool use_storage_cmd(JCR *jcr) } /* Try any drive */ rctx.any_drive = true; - Dmsg5(100, "PrefMnt=%d exact=%d suitable=%d chgronly=%d any=%d\n", + Dmsg5(110, "PrefMnt=%d exact=%d suitable=%d chgronly=%d any=%d\n", rctx.PreferMountedVols, rctx.exact_match, rctx.suitable_device, rctx.autochanger_only, rctx.any_drive); if ((ok = find_suitable_device_for_job(jcr, rctx))) { break; } /* Unlock before possible wait */ - V(search_lock); + unlock_reservations(); if (!rctx.suitable_device || !wait_for_device(jcr, first)) { + Dmsg0(100, "Fail. !suitable_device || !wait_for_device\n"); break; /* Get out, failure ... */ } first = false; @@ -415,7 +484,7 @@ static bool use_storage_cmd(JCR *jcr) } /* Note if !ok then search_lock is already cleared */ if (ok) { - V(search_lock); + unlock_reservations(); goto all_done; } @@ -458,13 +527,13 @@ void release_msgs(JCR *jcr) if (!msgs) { return; } - P(search_lock); + lock_reservations(); while ((msg = (char *)msgs->pop())) { free(msg); } delete msgs; jcr->reserve_msgs = NULL; - V(search_lock); + unlock_reservations(); } /* @@ -480,7 +549,7 @@ bool find_suitable_device_for_job(JCR *jcr, RCTX &rctx) * For each storage device that the user specified, we * search and see if there is a resource for that device. */ - Dmsg4(100, "PrefMnt=%d exact=%d suitable=%d chgronly=%d\n", + Dmsg4(110, "PrefMnt=%d exact=%d suitable=%d chgronly=%d\n", rctx.PreferMountedVols, rctx.exact_match, rctx.suitable_device, rctx.autochanger_only); ok = false; @@ -495,10 +564,10 @@ bool find_suitable_device_for_job(JCR *jcr, RCTX &rctx) ok = true; break; } else if (stat == 0) { /* device busy */ - Dmsg1(100, "Suitable device found=%s, not used: busy\n", device_name); + Dmsg1(110, "Suitable device found=%s, not used: busy\n", device_name); } else { /* otherwise error */ - Dmsg0(100, "No suitable device found.\n"); + Dmsg0(110, "No suitable device found.\n"); } } if (ok) { @@ -520,7 +589,7 @@ int search_res_for_device(RCTX &rctx) bool ok; int stat; - Dmsg1(100, "Search res for %s\n", rctx.device_name); + Dmsg1(110, "Search res for %s\n", rctx.device_name); /* Look through Autochangers first */ foreach_res(changer, R_AUTOCHANGER) { Dmsg1(150, "Try match changer res=%s\n", changer->hdr.name); @@ -528,7 +597,7 @@ int search_res_for_device(RCTX &rctx) if (fnmatch(rctx.device_name, changer->hdr.name, 0) == 0) { /* Try each device in this AutoChanger */ foreach_alist(rctx.device, changer->device) { - Dmsg1(100, "Try changer device %s\n", rctx.device->hdr.name); + Dmsg1(110, "Try changer device %s\n", rctx.device->hdr.name); stat = reserve_device(rctx); if (stat != 1) { /* try another device */ continue; @@ -592,7 +661,7 @@ static int reserve_device(RCTX &rctx) const int name_len = MAX_NAME_LENGTH; /* Make sure MediaType is OK */ - Dmsg2(100, "MediaType device=%s request=%s\n", + Dmsg2(110, "MediaType device=%s request=%s\n", rctx.device->media_type, rctx.store->media_type); if (strcmp(rctx.device->media_type, rctx.store->media_type) != 0) { return -1; @@ -616,7 +685,7 @@ static int reserve_device(RCTX &rctx) } rctx.suitable_device = true; - Dmsg2(100, "Try reserve %s JobId=%u\n", rctx.device->hdr.name, + Dmsg2(110, "Try reserve %s JobId=%u\n", rctx.device->hdr.name, rctx.jcr->JobId); dcr = new_dcr(rctx.jcr, rctx.device->dev); if (!dcr) { @@ -659,7 +728,7 @@ static int reserve_device(RCTX &rctx) } if (!ok) { free_dcr(dcr); - Dmsg0(100, "Not OK.\n"); + Dmsg0(110, "Not OK.\n"); return 0; } return 1; @@ -741,7 +810,7 @@ static bool reserve_device_for_append(DCR *dcr, RCTX &rctx) if (dev->can_read()) { Mmsg(jcr->errmsg, _("3603 JobId=%u device %s is busy reading.\n"), jcr->JobId, dev->print_name()); - Dmsg1(100, "%s", jcr->errmsg); + Dmsg1(110, "%s", jcr->errmsg); queue_reserve_message(jcr); goto bail_out; } @@ -750,16 +819,16 @@ static bool reserve_device_for_append(DCR *dcr, RCTX &rctx) if (is_device_unmounted(dev)) { Mmsg(jcr->errmsg, _("3604 JobId=%u device %s is BLOCKED due to user unmount.\n"), jcr->JobId, dev->print_name()); - Dmsg1(100, "%s", jcr->errmsg); + Dmsg1(110, "%s", jcr->errmsg); queue_reserve_message(jcr); goto bail_out; } - Dmsg1(100, "reserve_append device is %s\n", dev->is_tape()?"tape":"disk"); + Dmsg1(110, "reserve_append device is %s\n", dev->is_tape()?"tape":"disk"); /* Now do detailed tests ... */ if (can_reserve_drive(dcr, rctx) != 1) { - Dmsg0(100, "can_reserve_drive!=1\n"); + Dmsg0(110, "can_reserve_drive!=1\n"); goto bail_out; } @@ -784,7 +853,7 @@ static int can_reserve_drive(DCR *dcr, RCTX &rctx) DEVICE *dev = dcr->dev; JCR *jcr = dcr->jcr; - Dmsg5(100, "PrefMnt=%d exact=%d suitable=%d chgronly=%d any=%d\n", + Dmsg5(110, "PrefMnt=%d exact=%d suitable=%d chgronly=%d any=%d\n", rctx.PreferMountedVols, rctx.exact_match, rctx.suitable_device, rctx.autochanger_only, rctx.any_drive); @@ -797,7 +866,7 @@ static int can_reserve_drive(DCR *dcr, RCTX &rctx) * helps spread the load to the least used drives. */ if (rctx.try_low_use_drive && dev == rctx.low_use_drive) { - Dmsg3(100, "OK dev=%s == low_drive=%s. JobId=%u\n", + Dmsg3(110, "OK dev=%s == low_drive=%s. JobId=%u\n", dev->print_name(), rctx.low_use_drive->print_name(), jcr->JobId); return 1; } @@ -807,13 +876,13 @@ static int can_reserve_drive(DCR *dcr, RCTX &rctx) if ((dev->num_writers + dev->reserved_device) < rctx.num_writers) { rctx.num_writers = dev->num_writers + dev->reserved_device; rctx.low_use_drive = dev; - Dmsg2(100, "set low use drive=%s num_writers=%d\n", dev->print_name(), + Dmsg2(110, "set low use drive=%s num_writers=%d\n", dev->print_name(), rctx.num_writers); } else { - Dmsg1(100, "not low use num_writers=%d\n", dev->num_writers+ + Dmsg1(110, "not low use num_writers=%d\n", dev->num_writers+ dev->reserved_device); } - Dmsg1(100, "failed: !prefMnt && busy. JobId=%u\n", jcr->JobId); + Dmsg1(110, "failed: !prefMnt && busy. JobId=%u\n", jcr->JobId); Mmsg(jcr->errmsg, _("3605 JobId=%u wants free drive but device %s is busy.\n"), jcr->JobId, dev->print_name()); queue_reserve_message(jcr); @@ -825,7 +894,7 @@ static int can_reserve_drive(DCR *dcr, RCTX &rctx) Mmsg(jcr->errmsg, _("3606 JobId=%u wants mounted, but drive %s has no Volume.\n"), jcr->JobId, dev->print_name()); queue_reserve_message(jcr); - Dmsg1(100, "failed: want mounted -- no vol JobId=%u\n", jcr->JobId); + Dmsg1(110, "failed: want mounted -- no vol JobId=%u\n", jcr->JobId); return 0; /* No volume mounted */ } @@ -836,7 +905,7 @@ static int can_reserve_drive(DCR *dcr, RCTX &rctx) jcr->JobId, rctx.VolumeName, dev->VolHdr.VolumeName, dev->print_name()); queue_reserve_message(jcr); - Dmsg2(100, "failed: Not exact match have=%s want=%s\n", + Dmsg2(110, "failed: Not exact match have=%s want=%s\n", dev->VolHdr.VolumeName, rctx.VolumeName); return 0; } @@ -871,7 +940,7 @@ static int can_reserve_drive(DCR *dcr, RCTX &rctx) Mmsg(jcr->errmsg, _("3608 JobId=%u wants Pool=\"%s\" but have Pool=\"%s\" on drive %s.\n"), jcr->JobId, dcr->pool_name, dev->pool_name, dev->print_name()); queue_reserve_message(jcr); - Dmsg2(100, "failed: busy num_writers=0, reserved, pool=%s wanted=%s\n", + Dmsg2(110, "failed: busy num_writers=0, reserved, pool=%s wanted=%s\n", dev->pool_name, dcr->pool_name); return 0; /* wait */ } @@ -914,7 +983,7 @@ static int can_reserve_drive(DCR *dcr, RCTX &rctx) Mmsg(jcr->errmsg, _("3609 JobId=%u wants Pool=\"%s\" but have Pool=\"%s\" on drive %s.\n"), jcr->JobId, dcr->pool_name, dev->pool_name, dev->print_name()); queue_reserve_message(jcr); - Dmsg2(100, "failed: busy num_writers>0, can_append, pool=%s wanted=%s\n", + Dmsg2(110, "failed: busy num_writers>0, can_append, pool=%s wanted=%s\n", dev->pool_name, dcr->pool_name); return 0; /* wait */ } @@ -929,7 +998,7 @@ static int can_reserve_drive(DCR *dcr, RCTX &rctx) Mmsg(jcr->errmsg, _("3911 JobId=%u failed reserve drive %s.\n"), jcr->JobId, dev->print_name()); queue_reserve_message(jcr); - Dmsg2(100, "failed: No reserve %s JobId=%u\n", dev->print_name(), jcr->JobId); + Dmsg2(110, "failed: No reserve %s JobId=%u\n", dev->print_name(), jcr->JobId); return 0; } @@ -972,10 +1041,10 @@ void send_drive_reserve_messages(JCR *jcr, BSOCK *user) alist *msgs; char *msg; - P(search_lock); + lock_reservations(); msgs = jcr->reserve_msgs; if (!msgs || msgs->size() == 0) { - V(search_lock); + unlock_reservations(); return; } for (i=msgs->size()-1; i >= 0; i--) { @@ -986,5 +1055,5 @@ void send_drive_reserve_messages(JCR *jcr, BSOCK *user) break; } } - V(search_lock); + unlock_reservations(); } diff --git a/bacula/src/stored/stored.c b/bacula/src/stored/stored.c index 71d180a6bf..0889a761f7 100644 --- a/bacula/src/stored/stored.c +++ b/bacula/src/stored/stored.c @@ -54,7 +54,7 @@ char *configfile = NULL; bool init_done = false; /* Global static variables */ -static int foreground = 0; +static bool foreground = 0; static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; static workq_t dird_workq; /* queue for processing connections */ @@ -87,8 +87,8 @@ static void usage() int main (int argc, char *argv[]) { int ch; - int no_signals = FALSE; - int test_config = FALSE; + bool no_signals = false; + bool test_config = false; pthread_t thid; char *uid = NULL; char *gid = NULL; @@ -128,7 +128,7 @@ int main (int argc, char *argv[]) break; case 'f': /* run in foreground */ - foreground = TRUE; + foreground = true; break; case 'g': /* set group id */ @@ -140,11 +140,11 @@ int main (int argc, char *argv[]) break; case 's': /* no signals */ - no_signals = TRUE; + no_signals = true; break; case 't': - test_config = TRUE; + test_config = true; break; case 'u': /* set uid */ @@ -193,6 +193,8 @@ int main (int argc, char *argv[]) Jmsg((JCR *)NULL, M_ERROR_TERM, 0, _("Please correct configuration file: %s\n"), configfile); } + init_reservations_lock(); + if (test_config) { terminate_stored(0); } @@ -228,7 +230,7 @@ int main (int argc, char *argv[]) /* * Start the device allocation thread */ - init_volume_list(); /* do before device_init */ + create_volume_list(); /* do before device_init */ if (pthread_create(&thid, NULL, device_initialization, NULL) != 0) { Emsg1(M_ABORT, 0, _("Unable to create thread. ERR=%s\n"), strerror(errno)); } @@ -564,6 +566,7 @@ void terminate_stored(int sig) if (debug_level > 10) { print_memory_pool_stats(); } + term_reservations_lock(); term_msg(); stop_watchdog(); cleanup_crypto(); diff --git a/bacula/src/stored/stored_conf.h b/bacula/src/stored/stored_conf.h index 93e37f3117..e1db10d0bc 100644 --- a/bacula/src/stored/stored_conf.h +++ b/bacula/src/stored/stored_conf.h @@ -114,7 +114,7 @@ public: uint32_t cap_bits; /* Capabilities of this device */ utime_t max_changer_wait; /* Changer timeout */ utime_t max_rewind_wait; /* maximum secs to wait for rewind */ - uint32_t max_open_wait; /* maximum secs to wait for open */ + utime_t max_open_wait; /* maximum secs to wait for open */ uint32_t max_open_vols; /* maximum simultaneous open volumes */ uint32_t min_block_size; /* min block size */ uint32_t max_block_size; /* max block size */ diff --git a/bacula/src/version.h b/bacula/src/version.h index 3f38819a6b..e65726357c 100644 --- a/bacula/src/version.h +++ b/bacula/src/version.h @@ -4,8 +4,8 @@ #undef VERSION #define VERSION "1.39.6" -#define BDATE "08 March 2006" -#define LSMDATE "08Mar06" +#define BDATE "14 March 2006" +#define LSMDATE "14Mar06" /* Debug flags */ #undef DEBUG -- 2.39.5