/*
- Bacula® - The Network Backup Solution
-
- Copyright (C) 2000-2010 Free Software Foundation Europe e.V.
-
- The main author of Bacula is Kern Sibbald, with contributions from
- many others, a complete list can be found in the file AUTHORS.
- This program is Free Software; you can redistribute it and/or
- modify it under the terms of version two of the GNU General Public
- License as published by the Free Software Foundation and included
- in the file LICENSE.
-
- This program is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- 02110-1301, USA.
-
- Bacula® is a registered trademark of Kern Sibbald.
- The licensor of Bacula is the Free Software Foundation Europe
- (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich,
- Switzerland, email:ftf@fsfeurope.org.
+ Bacula(R) - The Network Backup Solution
+
+ Copyright (C) 2000-2017 Kern Sibbald
+
+ The original author of Bacula is Kern Sibbald, with contributions
+ from many others, a complete list can be found in the file AUTHORS.
+
+ You may use this file and others of this release according to the
+ license defined in the LICENSE file, which includes the Affero General
+ Public License, v3.0 ("AGPLv3") and some additional permissions and
+ terms pursuant to its AGPLv3 Section 7.
+
+ This notice must be preserved when any source code is
+ conveyed and/or propagated.
+
+ Bacula(R) is a registered trademark of Kern Sibbald.
*/
/*
* Manipulation routines for Job Control Records and
* The result is that there is one lock/unlock for each entry
* in the list while traversing it rather than a single lock
* at the beginning of a traversal and one at the end. This
- * incurs slightly more overhead, but effectively eliminates
+ * incurs slightly more overhead, but effectively eliminates
* the possibilty of race conditions. In addition, with the
* exception of the global locking of the list during the
* re-reading of the config file, no recursion is needed.
int num_jobs_run;
dlist *last_jobs = NULL;
const int max_last_jobs = 10;
-
+
static dlist *jcrs = NULL; /* JCR chain */
static pthread_mutex_t jcr_lock = PTHREAD_MUTEX_INITIALIZER;
static pthread_key_t jcr_key; /* Pointer to jcr for each thread */
-pthread_once_t key_once = PTHREAD_ONCE_INIT;
+pthread_once_t key_once = PTHREAD_ONCE_INIT;
+
+static char Job_status[] = "Status JobId=%ld JobStatus=%d\n";
void lock_jobs()
jcr->job_end_push.append(ctx);
}
+/* DELETE ME when bugs in MA1512, MA1632 MA1639 are fixed */
+void (*MA1512_reload_job_end_cb)(JCR *,void *) = NULL;
+
/* Pop each job_end subroutine and call it */
static void job_end_pop(JCR *jcr)
{
for (int i=jcr->job_end_push.size()-1; i > 0; ) {
ctx = jcr->job_end_push.get(i--);
job_end_cb = (void (*)(JCR *,void *))jcr->job_end_push.get(i--);
- job_end_cb(jcr, ctx);
+ /* check for bug MA1512, MA1632 MA1639,
+ * today, job_end_cb can only be reload_job_end_cb() from DIR */
+ if (job_end_cb != MA1512_reload_job_end_cb && MA1512_reload_job_end_cb != NULL) {
+ Tmsg2(0, "Bug 'job_end_pop' detected, skip ! job_end_cb=0x%p ctx=0x%p\n", job_end_cb, ctx);
+ Tmsg0(0, "Display job_end_push list\n");
+ for (int j=jcr->job_end_push.size()-1; j > 0; ) {
+ void *ctx2 = jcr->job_end_push.get(j--);
+ void *job_end_cb2 = jcr->job_end_push.get(j--);
+ Tmsg3(0, "Bug 'job_end_pop' entry[%d] job_end_cb=0x%p ctx=0x%p\n", j+1, job_end_cb2, ctx2);
+ }
+ } else
+ {
+ job_end_cb(jcr, ctx);
+ }
}
}
{
JCR *jcr;
MQUEUE_ITEM *item = NULL;
- struct sigaction sigtimer;
int status;
Dmsg0(dbglvl, "Enter new_jcr\n");
Jmsg1(NULL, M_ABORT, 0, _("pthread_once failed. ERR=%s\n"), be.bstrerror(status));
}
jcr = (JCR *)malloc(size);
- memset(jcr, 0, size);
+ bmemzero(jcr, size);
+ /* Note for the director, this value is changed in jobq.c */
+ jcr->my_thread_id = pthread_self();
jcr->msg_queue = New(dlist(item, &item->link));
if ((status = pthread_mutex_init(&jcr->msg_queue_mutex, NULL)) != 0) {
berrno be;
}
jcr->job_end_push.init(1, false);
jcr->sched_time = time(NULL);
+ jcr->initial_sched_time = jcr->sched_time;
jcr->daemon_free_jcr = daemon_free_jcr; /* plug daemon free routine */
jcr->init_mutex();
- jcr->inc_use_count();
+ jcr->inc_use_count();
jcr->VolumeName = get_pool_memory(PM_FNAME);
jcr->VolumeName[0] = 0;
jcr->errmsg = get_pool_memory(PM_MESSAGE);
jcr->errmsg[0] = 0;
jcr->comment = get_pool_memory(PM_FNAME);
jcr->comment[0] = 0;
+ jcr->StatusErrMsg = get_pool_memory(PM_FNAME);
+ jcr->StatusErrMsg[0] = 0;
+ jcr->job_uid = -1;
/* Setup some dummy values */
bstrncpy(jcr->Job, "*System*", sizeof(jcr->Job));
jcr->JobId = 0;
jcr->setJobType(JT_SYSTEM); /* internal job until defined */
jcr->setJobLevel(L_NONE);
jcr->setJobStatus(JS_Created); /* ready to run */
- set_jcr_in_tsd(jcr);
+#ifndef HAVE_WIN32
+ struct sigaction sigtimer;
sigtimer.sa_flags = 0;
sigtimer.sa_handler = timeout_handler;
sigfillset(&sigtimer.sa_mask);
sigaction(TIMEOUT_SIGNAL, &sigtimer, NULL);
+#endif
/*
* Locking jobs is a global lock that is needed
{
/* Uses jcr lock/unlock */
remove_jcr_from_tsd(jcr);
+ jcr->set_killable(false);
jcr->destroy_mutex();
jcr->msg_queue = NULL;
pthread_mutex_destroy(&jcr->msg_queue_mutex);
}
- close_msg(jcr); /* close messages for this job */
/* do this after closing messages */
- if (jcr->client_name) {
- free_pool_memory(jcr->client_name);
- jcr->client_name = NULL;
- }
-
- if (jcr->attr) {
- free_pool_memory(jcr->attr);
- jcr->attr = NULL;
- }
+ free_and_null_pool_memory(jcr->JobIds);
+ free_and_null_pool_memory(jcr->client_name);
+ free_and_null_pool_memory(jcr->attr);
+ free_and_null_pool_memory(jcr->VolumeName);
+ free_and_null_pool_memory(jcr->errmsg);
+ free_and_null_pool_memory(jcr->StatusErrMsg);
if (jcr->sd_auth_key) {
free(jcr->sd_auth_key);
jcr->sd_auth_key = NULL;
}
- if (jcr->VolumeName) {
- free_pool_memory(jcr->VolumeName);
- jcr->VolumeName = NULL;
- }
- if (jcr->dir_bsock) {
- bnet_close(jcr->dir_bsock);
- jcr->dir_bsock = NULL;
- }
- if (jcr->errmsg) {
- free_pool_memory(jcr->errmsg);
- jcr->errmsg = NULL;
- }
+ free_bsock(jcr->dir_bsock);
+
if (jcr->where) {
free(jcr->where);
jcr->where = NULL;
{
struct s_last_job *je;
- Dmsg3(dbglvl, "Enter free_jcr jid=%u use_count=%d Job=%s\n",
+ Dmsg3(dbglvl, "Enter free_jcr jid=%u use_count=%d Job=%s\n",
jcr->JobId, jcr->use_count(), jcr->Job);
#endif
lock_jcr_chain();
jcr->dec_use_count(); /* decrement use count */
- if (jcr->use_count() < 0) {
- Jmsg2(jcr, M_ERROR, 0, _("JCR use_count=%d JobId=%d\n"),
- jcr->use_count(), jcr->JobId);
- }
+ ASSERT2(jcr->use_count() >= 0, "JCR use_count < 0");
+ // Jmsg2(jcr, M_ERROR, 0, _("JCR use_count=%d JobId=%d\n"),
+ // jcr->use_count(), jcr->JobId);
+ //}
if (jcr->JobId > 0) {
- Dmsg3(dbglvl, "Dec free_jcr jid=%u use_count=%d Job=%s\n",
+ Dmsg3(dbglvl, "Dec free_jcr jid=%u use_count=%d Job=%s\n",
jcr->JobId, jcr->use_count(), jcr->Job);
}
if (jcr->use_count() > 0) { /* if in use */
return;
}
if (jcr->JobId > 0) {
- Dmsg3(dbglvl, "remove jcr jid=%u use_count=%d Job=%s\n",
+ Dmsg3(dbglvl, "remove jcr jid=%u use_count=%d Job=%s\n",
jcr->JobId, jcr->use_count(), jcr->Job);
}
+ jcr->exiting = true;
remove_jcr(jcr); /* remove Jcr from chain */
unlock_jcr_chain();
dequeue_messages(jcr);
+ close_msg(jcr); /* close messages for this job */
job_end_pop(jcr); /* pop and call hooked routines */
Dmsg1(dbglvl, "End job=%d\n", jcr->JobId);
free_common_jcr(jcr);
close_msg(NULL); /* flush any daemon messages */
- garbage_collect_memory_pool();
Dmsg0(dbglvl, "Exit free_jcr\n");
}
void remove_jcr_from_tsd(JCR *jcr)
{
JCR *tjcr = get_jcr_from_tsd();
- if (tjcr == jcr) {
- jcr->lock();
- jcr->my_thread_running = false;
- memset(&jcr->my_thread_id, 0, sizeof(jcr->my_thread_id));
- jcr->unlock();
+ if (tjcr == jcr) {
set_jcr_in_tsd(INVALID_JCR);
}
}
+void JCR::set_killable(bool killable)
+{
+ lock();
+ my_thread_killable = killable;
+ unlock();
+}
+
/*
* Put this jcr in the thread specifc data
* if update_thread_info is true and the jcr is valide,
* we update the my_thread_id in the JCR
*/
-void set_jcr_in_tsd(JCR *jcr, bool update_thread_info)
+void set_jcr_in_tsd(JCR *jcr)
{
int status = pthread_setspecific(jcr_key, (void *)jcr);
if (status != 0) {
berrno be;
- Jmsg1(jcr, M_ABORT, 0, _("pthread_setspecific failed: ERR=%s\n"),
+ Jmsg1(jcr, M_ABORT, 0, _("pthread_setspecific failed: ERR=%s\n"),
be.bstrerror(status));
}
-
- /* We explicitly ask to set a jcr in tsd, we can update jcr->my_thread
- */
- if (update_thread_info && jcr && jcr != INVALID_JCR) {
- Dmsg2(100, "setting my_thread_stuffs 0x%p => 0x%p\n",
- jcr->my_thread_id, pthread_self());
- jcr->lock();
- //ASSERT(jcr->my_thread_running == false);
- jcr->my_thread_id = pthread_self();
- jcr->my_thread_running = true;
- jcr->unlock();
- }
}
void JCR::my_thread_send_signal(int sig)
{
+ lock_jcr_chain(); /* use global lock */
this->lock();
- if ( this->my_thread_running
- && !pthread_equal(this->my_thread_id, pthread_self()))
+ if (this->exiting) {
+ goto get_out;
+ }
+ if (this->is_killable() &&
+ !pthread_equal(this->my_thread_id, pthread_self()))
{
Dmsg1(800, "Send kill to jid=%d\n", this->JobId);
pthread_kill(this->my_thread_id, sig);
+ this->exiting = true;
- } else if (!this->my_thread_running) {
- Dmsg1(10, "Warning, can't send kill to jid=%d\n", this->JobId);
+ } else if (!this->is_killable()) {
+ Dmsg1(10, "Warning, cannot send kill to jid=%d marked not killable.\n", this->JobId);
}
+get_out:
this->unlock();
+ unlock_jcr_chain();
}
/*
return jcr;
}
-
+
/*
* Find which JobId corresponds to the current thread
*/
foreach_jcr(jcr) {
if (jcr->JobId == JobId) {
jcr->inc_use_count();
- Dmsg3(dbglvl, "Inc get_jcr jid=%u use_count=%d Job=%s\n",
+ Dmsg3(dbglvl, "Inc get_jcr jid=%u use_count=%d Job=%s\n",
jcr->JobId, jcr->use_count(), jcr->Job);
break;
}
if (jcr->VolSessionId == SessionId &&
jcr->VolSessionTime == SessionTime) {
jcr->inc_use_count();
- Dmsg3(dbglvl, "Inc get_jcr jid=%u use_count=%d Job=%s\n",
+ Dmsg3(dbglvl, "Inc get_jcr jid=%u use_count=%d Job=%s\n",
jcr->JobId, jcr->use_count(), jcr->Job);
break;
}
foreach_jcr(jcr) {
if (strncmp(Job, jcr->Job, len) == 0) {
jcr->inc_use_count();
- Dmsg3(dbglvl, "Inc get_jcr jid=%u use_count=%d Job=%s\n",
+ Dmsg3(dbglvl, "Inc get_jcr jid=%u use_count=%d Job=%s\n",
jcr->JobId, jcr->use_count(), jcr->Job);
break;
}
foreach_jcr(jcr) {
if (strcmp(jcr->Job, Job) == 0) {
jcr->inc_use_count();
- Dmsg3(dbglvl, "Inc get_jcr jid=%u use_count=%d Job=%s\n",
+ Dmsg3(dbglvl, "Inc get_jcr jid=%u use_count=%d Job=%s\n",
jcr->JobId, jcr->use_count(), jcr->Job);
break;
}
enter_in_waittime = false; /* not a Wait situation */
break;
}
-
+
/*
* If we were previously waiting and are not any more
* we want to update the wait_time variable, which is
}
}
-/*
+/*
* Priority runs from 0 (lowest) to 10 (highest)
*/
static int get_status_priority(int JobStatus)
{
int priority = 0;
switch (JobStatus) {
+ case JS_Incomplete:
+ priority = 10;
+ break;
case JS_ErrorTerminated:
case JS_FatalError:
case JS_Canceled:
- case JS_Incomplete:
- priority = 10;
+ priority = 9;
break;
case JS_Error:
priority = 8;
return priority;
}
+/*
+ * Send Job status to Director
+ */
+bool JCR::sendJobStatus()
+{
+ if (dir_bsock) {
+ return dir_bsock->fsend(Job_status, JobId, JobStatus);
+ }
+ return true;
+}
-void set_jcr_job_status(JCR *jcr, int JobStatus)
+/*
+ * Set and send Job status to Director
+ */
+bool JCR::sendJobStatus(int aJobStatus)
{
- jcr->setJobStatus(JobStatus);
+ if (!is_JobStatus(aJobStatus)) {
+ setJobStatus(aJobStatus);
+ if (dir_bsock) {
+ return dir_bsock->fsend(Job_status, JobId, JobStatus);
+ }
+ }
+ return true;
+}
+
+void JCR::setJobStarted()
+{
+ job_started = true;
+ job_started_time = time(NULL);
}
+static pthread_mutex_t status_lock = PTHREAD_MUTEX_INITIALIZER;
+
void JCR::setJobStatus(int newJobStatus)
{
- JCR *jcr = this;
int priority, old_priority;
- int oldJobStatus = jcr->JobStatus;
+ int oldJobStatus = JobStatus;
+
+ P(status_lock);
priority = get_status_priority(newJobStatus);
old_priority = get_status_priority(oldJobStatus);
-
- Dmsg2(800, "set_jcr_job_status(%s, %c)\n", Job, newJobStatus);
+
+ Dmsg2(800, "set_jcr_job_status(%ld, %c)\n", JobId, newJobStatus);
/* Update wait_time depending on newJobStatus and oldJobStatus */
- update_wait_time(jcr, newJobStatus);
+ update_wait_time(this, newJobStatus);
/*
* For a set of errors, ... keep the current status
* so it isn't lost. For all others, set it.
*/
- Dmsg2(800, "OnEntry JobStatus=%c newJobstatus=%c\n", oldJobStatus, newJobStatus);
+ Dmsg2(800, "OnEntry JobStatus=%c newJobstatus=%c\n", (oldJobStatus==0)?'0':oldJobStatus, newJobStatus);
/*
* If status priority is > than proposed new status, change it.
* If status priority == new priority and both are zero, take
- * the new status.
+ * the new status.
* If it is not zero, then we keep the first non-zero "error" that
* occurred.
*/
if (priority > old_priority || (
priority == 0 && old_priority == 0)) {
Dmsg4(800, "Set new stat. old: %c,%d new: %c,%d\n",
- jcr->JobStatus, old_priority, newJobStatus, priority);
- jcr->JobStatus = newJobStatus; /* replace with new status */
+ (oldJobStatus==0)?'0':oldJobStatus, old_priority, newJobStatus, priority);
+ JobStatus = newJobStatus; /* replace with new status */
}
- if (oldJobStatus != jcr->JobStatus) {
- Dmsg2(800, "leave set_job_status old=%c new=%c\n", oldJobStatus, newJobStatus);
-// generate_plugin_event(jcr, bEventStatusChange, NULL);
+ if (oldJobStatus != JobStatus) {
+ Dmsg2(800, "leave setJobStatus old=%c new=%c\n", (oldJobStatus==0)?'0':oldJobStatus, newJobStatus);
+// generate_plugin_event(this, bEventStatusChange, NULL);
}
+ V(status_lock);
}
#ifdef TRACE_JCR_CHAIN
* released with:
*
* free_jcr(jcr);
- *
+ *
*/
-JCR *jcr_walk_start()
+JCR *jcr_walk_start()
{
JCR *jcr;
lock_jcr_chain();
if (jcr) {
jcr->inc_use_count();
if (jcr->JobId > 0) {
- Dmsg3(dbglvl, "Inc walk_start jid=%u use_count=%d Job=%s\n",
+ Dmsg3(dbglvl, "Inc walk_start jid=%u use_count=%d Job=%s\n",
jcr->JobId, jcr->use_count(), jcr->Job);
}
}
if (jcr) {
jcr->inc_use_count();
if (jcr->JobId > 0) {
- Dmsg3(dbglvl, "Inc walk_next jid=%u use_count=%d Job=%s\n",
+ Dmsg3(dbglvl, "Inc walk_next jid=%u use_count=%d Job=%s\n",
jcr->JobId, jcr->use_count(), jcr->Job);
}
}
{
if (jcr) {
if (jcr->JobId > 0) {
- Dmsg3(dbglvl, "Free walk_end jid=%u use_count=%d Job=%s\n",
+ Dmsg3(dbglvl, "Free walk_end jid=%u use_count=%d Job=%s\n",
jcr->JobId, jcr->use_count(), jcr->Job);
}
free_jcr(jcr);
bs->set_timed_out();
Qmsg(jcr, M_ERROR, 0, _(
"Watchdog sending kill after %d secs to thread stalled reading Storage daemon.\n"),
- watchdog_time - timer_start);
- pthread_kill(jcr->my_thread_id, TIMEOUT_SIGNAL);
+ (int)(watchdog_time - timer_start));
+ jcr->my_thread_send_signal(TIMEOUT_SIGNAL);
}
}
bs = jcr->file_bsock;
bs->set_timed_out();
Qmsg(jcr, M_ERROR, 0, _(
"Watchdog sending kill after %d secs to thread stalled reading File daemon.\n"),
- watchdog_time - timer_start);
- pthread_kill(jcr->my_thread_id, TIMEOUT_SIGNAL);
+ (int)(watchdog_time - timer_start));
+ jcr->my_thread_send_signal(TIMEOUT_SIGNAL);
}
}
bs = jcr->dir_bsock;
bs->set_timed_out();
Qmsg(jcr, M_ERROR, 0, _(
"Watchdog sending kill after %d secs to thread stalled reading Director.\n"),
- watchdog_time - timer_start);
- pthread_kill(jcr->my_thread_id, TIMEOUT_SIGNAL);
+ (int)(watchdog_time - timer_start));
+ jcr->my_thread_send_signal(TIMEOUT_SIGNAL);
}
}
}
Dmsg0(dbglvl, "Finished JCR timeout checks\n");
}
-/*
- * Return next JobId from comma separated list
+/*
+ * Return next JobId from comma separated list
*
* Returns:
* 1 if next JobId returned
return; /* thus interrupting the function */
}
-/* Used to display specific daemon information after a fatal signal
- * (like B_DB in the director)
+/* Used to display specific daemon information after a fatal signal
+ * (like BDB in the director)
*/
#define MAX_DBG_HOOK 10
static dbg_jcr_hook_t *dbg_jcr_hooks[MAX_DBG_HOOK];
-static int dbg_jcr_handler_count;
+static int dbg_jcr_handler_count=0;
void dbg_jcr_add_hook(dbg_jcr_hook_t *hook)
{
dbg_jcr_hooks[dbg_jcr_handler_count++] = hook;
}
+/* on win32, the pthread_t is a struct, so we don't display it */
+#ifdef HAVE_MINGW_W64
+# define get_threadid(a) (void *)0
+#else
+# define get_threadid(a) (void *)(a)
+#endif
/*
- * !!! WARNING !!!
+ * !!! WARNING !!!
*
* This function should be used ONLY after a fatal signal. We walk through the
* JCR chain without doing any lock, Bacula should not be running.
void dbg_print_jcr(FILE *fp)
{
char buf1[128], buf2[128], buf3[128], buf4[128];
+
if (!jcrs) {
return;
}
- fprintf(fp, "Attempt to dump current JCRs\n");
+ fprintf(fp, "Attempt to dump current JCRs. njcrs=%d\n", jcrs->size());
for (JCR *jcr = (JCR *)jcrs->first(); jcr ; jcr = (JCR *)jcrs->next(jcr)) {
- fprintf(fp, "JCR=%p JobId=%d name=%s JobStatus=%c\n",
- jcr, (int)jcr->JobId, jcr->Job, jcr->JobStatus);
- fprintf(fp, "\tuse_count=%i\n", jcr->use_count());
+ fprintf(fp, "threadid=%p JobId=%d JobStatus=%c jcr=%p name=%s\n",
+ get_threadid(jcr->my_thread_id), (int)jcr->JobId, jcr->JobStatus, jcr, jcr->Job);
+ fprintf(fp, "\tuse_count=%i killable=%d\n",
+ jcr->use_count(), jcr->is_killable());
fprintf(fp, "\tJobType=%c JobLevel=%c\n",
jcr->getJobType(), jcr->getJobLevel());
bstrftime(buf1, sizeof(buf1), jcr->sched_time);
bstrftime(buf4, sizeof(buf4), jcr->wait_time);
fprintf(fp, "\tsched_time=%s start_time=%s\n\tend_time=%s wait_time=%s\n",
buf1, buf2, buf3, buf4);
- fprintf(fp, "\tdb=%p db_batch=%p batch_started=%i\n",
+ fprintf(fp, "\tdb=%p db_batch=%p batch_started=%i\n",
jcr->db, jcr->db_batch, jcr->batch_started);
-
+
/*
* Call all the jcr debug hooks
*/