2 Bacula® - The Network Backup Solution
4 Copyright (C) 2000-2012 Free Software Foundation Europe e.V.
6 The main author of Bacula is Kern Sibbald, with contributions from
7 many others, a complete list can be found in the file AUTHORS.
8 This program is Free Software; you can redistribute it and/or
9 modify it under the terms of version three of the GNU Affero General Public
10 License as published by the Free Software Foundation and included
13 This program is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
18 You should have received a copy of the GNU Affero General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
23 Bacula® is a registered trademark of Kern Sibbald.
24 The licensor of Bacula is the Free Software Foundation Europe
25 (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich,
26 Switzerland, email:ftf@fsfeurope.org.
29 * Manipulation routines for Job Control Records and
30 * handling of last_jobs_list.
32 * Kern E. Sibbald, December 2000
34 * These routines are thread safe.
36 * The job list routines were re-written in May 2005 to
37 * eliminate the global lock while traversing the list, and
38 * to use the dlist subroutines. The locking is now done
39 * on the list each time the list is modified or traversed.
40 * That is it is "micro-locked" rather than globally locked.
41 * The result is that there is one lock/unlock for each entry
42 * in the list while traversing it rather than a single lock
43 * at the beginning of a traversal and one at the end. This
44 * incurs slightly more overhead, but effectively eliminates
45 * the possibilty of race conditions. In addition, with the
46 * exception of the global locking of the list during the
47 * re-reading of the config file, no recursion is needed.
54 const int dbglvl = 3400;
56 /* External variables we reference */
58 /* External referenced functions */
59 void free_bregexps(alist *bregexps);
61 /* Forward referenced functions */
62 extern "C" void timeout_handler(int sig);
63 static void jcr_timeout_check(watchdog_t *self);
64 #ifdef TRACE_JCR_CHAIN
65 static void b_lock_jcr_chain(const char *filen, int line);
66 static void b_unlock_jcr_chain(const char *filen, int line);
67 #define lock_jcr_chain() b_lock_jcr_chain(__FILE__, __LINE__);
68 #define unlock_jcr_chain() b_unlock_jcr_chain(__FILE__, __LINE__);
70 static void lock_jcr_chain();
71 static void unlock_jcr_chain();
76 dlist *last_jobs = NULL;
77 const int max_last_jobs = 10;
79 static dlist *jcrs = NULL; /* JCR chain */
80 static pthread_mutex_t jcr_lock = PTHREAD_MUTEX_INITIALIZER;
82 static pthread_mutex_t job_start_mutex = PTHREAD_MUTEX_INITIALIZER;
84 static pthread_mutex_t last_jobs_mutex = PTHREAD_MUTEX_INITIALIZER;
86 static pthread_key_t jcr_key; /* Pointer to jcr for each thread */
88 pthread_once_t key_once = PTHREAD_ONCE_INIT;
90 static char Job_status[] = "Status Job=%s JobStatus=%d\n";
103 void init_last_jobs_list()
106 struct s_last_job *job_entry = NULL;
108 last_jobs = New(dlist(job_entry, &job_entry->link));
111 jcrs = New(dlist(jcr, &jcr->link));
115 void term_last_jobs_list()
118 lock_last_jobs_list();
119 while (!last_jobs->empty()) {
120 void *je = last_jobs->first();
121 last_jobs->remove(je);
126 unlock_last_jobs_list();
134 bool read_last_jobs_list(int fd, uint64_t addr)
136 struct s_last_job *je, job;
140 Dmsg1(100, "read_last_jobs seek to %d\n", (int)addr);
141 if (addr == 0 || lseek(fd, (boffset_t)addr, SEEK_SET) < 0) {
144 if (read(fd, &num, sizeof(num)) != sizeof(num)) {
147 Dmsg1(100, "Read num_items=%d\n", num);
148 if (num > 4 * max_last_jobs) { /* sanity check */
151 lock_last_jobs_list();
152 for ( ; num; num--) {
153 if (read(fd, &job, sizeof(job)) != sizeof(job)) {
155 Pmsg1(000, "Read job entry. ERR=%s\n", be.bstrerror());
160 je = (struct s_last_job *)malloc(sizeof(struct s_last_job));
161 memcpy((char *)je, (char *)&job, sizeof(job));
163 init_last_jobs_list();
165 last_jobs->append(je);
166 if (last_jobs->size() > max_last_jobs) {
167 je = (struct s_last_job *)last_jobs->first();
168 last_jobs->remove(je);
173 unlock_last_jobs_list();
177 uint64_t write_last_jobs_list(int fd, uint64_t addr)
179 struct s_last_job *je;
183 Dmsg1(100, "write_last_jobs seek to %d\n", (int)addr);
184 if (lseek(fd, (boffset_t)addr, SEEK_SET) < 0) {
188 lock_last_jobs_list();
189 /* First record is number of entires */
190 num = last_jobs->size();
191 if (write(fd, &num, sizeof(num)) != sizeof(num)) {
193 Pmsg1(000, "Error writing num_items: ERR=%s\n", be.bstrerror());
196 foreach_dlist(je, last_jobs) {
197 if (write(fd, je, sizeof(struct s_last_job)) != sizeof(struct s_last_job)) {
199 Pmsg1(000, "Error writing job: ERR=%s\n", be.bstrerror());
203 unlock_last_jobs_list();
205 /* Return current address */
206 stat = lseek(fd, 0, SEEK_CUR);
213 unlock_last_jobs_list();
217 void lock_last_jobs_list()
222 void unlock_last_jobs_list()
227 /* Get an ASCII representation of the Operation being performed as an english Noun */
228 const char *JCR::get_OperationName()
234 return _("Verifying");
236 return _("Restoring");
238 return _("Archiving");
242 return _("Migration");
244 return _("Scanning");
246 return _("Unknown operation");
250 /* Get an ASCII representation of the Action being performed either an english Verb or Adjective */
251 const char *JCR::get_ActionName(bool past)
257 return (past == true) ? _("verified") : _("verify");
259 return (past == true) ? _("restored") : _("restore");
261 return (past == true) ? _("archived") : _("archive");
263 return (past == true) ? _("copied") : _("copy");
265 return (past == true) ? _("migrated") : _("migrate");
267 return (past == true) ? _("scanned") : _("scan");
269 return _("unknown action");
282 if (m_JobLevel == L_VIRTUAL_FULL) {
293 * Push a subroutine address into the job end callback stack
295 void job_end_push(JCR *jcr, void job_end_cb(JCR *jcr,void *), void *ctx)
297 jcr->job_end_push.append((void *)job_end_cb);
298 jcr->job_end_push.append(ctx);
301 /* Pop each job_end subroutine and call it */
302 static void job_end_pop(JCR *jcr)
304 void (*job_end_cb)(JCR *jcr, void *ctx);
306 for (int i=jcr->job_end_push.size()-1; i > 0; ) {
307 ctx = jcr->job_end_push.get(i--);
308 job_end_cb = (void (*)(JCR *,void *))jcr->job_end_push.get(i--);
309 job_end_cb(jcr, ctx);
314 * Create thread key for thread specific data
316 void create_jcr_key()
318 int status = pthread_key_create(&jcr_key, NULL);
321 Jmsg1(NULL, M_ABORT, 0, _("pthread key create failed: ERR=%s\n"),
322 be.bstrerror(status));
327 * Create a Job Control Record and link it into JCR chain
328 * Returns newly allocated JCR
329 * Note, since each daemon has a different JCR, he passes
332 JCR *new_jcr(int size, JCR_free_HANDLER *daemon_free_jcr)
335 MQUEUE_ITEM *item = NULL;
336 struct sigaction sigtimer;
339 Dmsg0(dbglvl, "Enter new_jcr\n");
340 status = pthread_once(&key_once, create_jcr_key);
343 Jmsg1(NULL, M_ABORT, 0, _("pthread_once failed. ERR=%s\n"), be.bstrerror(status));
345 jcr = (JCR *)malloc(size);
346 memset(jcr, 0, size);
347 jcr->msg_queue = New(dlist(item, &item->link));
348 if ((status = pthread_mutex_init(&jcr->msg_queue_mutex, NULL)) != 0) {
350 Jmsg(NULL, M_ABORT, 0, _("Could not init msg_queue mutex. ERR=%s\n"),
351 be.bstrerror(status));
353 jcr->job_end_push.init(1, false);
354 jcr->sched_time = time(NULL);
355 jcr->daemon_free_jcr = daemon_free_jcr; /* plug daemon free routine */
357 jcr->inc_use_count();
358 jcr->VolumeName = get_pool_memory(PM_FNAME);
359 jcr->VolumeName[0] = 0;
360 jcr->errmsg = get_pool_memory(PM_MESSAGE);
362 jcr->comment = get_pool_memory(PM_FNAME);
364 /* Setup some dummy values */
365 bstrncpy(jcr->Job, "*System*", sizeof(jcr->Job));
367 jcr->setJobType(JT_SYSTEM); /* internal job until defined */
368 jcr->setJobLevel(L_NONE);
369 jcr->setJobStatus(JS_Created); /* ready to run */
370 sigtimer.sa_flags = 0;
371 sigtimer.sa_handler = timeout_handler;
372 sigfillset(&sigtimer.sa_mask);
373 sigaction(TIMEOUT_SIGNAL, &sigtimer, NULL);
376 * Locking jobs is a global lock that is needed
377 * so that the Director can stop new jobs from being
378 * added to the jcr chain while it processes a new
379 * conf file and does the job_end_push().
384 jcrs = New(dlist(jcr, &jcr->link));
395 * Remove a JCR from the chain
396 * NOTE! The chain must be locked prior to calling
399 static void remove_jcr(JCR *jcr)
401 Dmsg0(dbglvl, "Enter remove_jcr\n");
403 Emsg0(M_ABORT, 0, _("NULL jcr.\n"));
406 Dmsg0(dbglvl, "Leave remove_jcr\n");
410 * Free stuff common to all JCRs. N.B. Be careful to include only
411 * generic stuff in the common part of the jcr.
413 static void free_common_jcr(JCR *jcr)
415 /* Uses jcr lock/unlock */
416 remove_jcr_from_tsd(jcr);
417 jcr->set_killable(false);
419 jcr->destroy_mutex();
421 if (jcr->msg_queue) {
422 delete jcr->msg_queue;
423 jcr->msg_queue = NULL;
424 pthread_mutex_destroy(&jcr->msg_queue_mutex);
426 close_msg(jcr); /* close messages for this job */
428 /* do this after closing messages */
429 if (jcr->client_name) {
430 free_pool_memory(jcr->client_name);
431 jcr->client_name = NULL;
435 free_pool_memory(jcr->attr);
439 if (jcr->sd_auth_key) {
440 free(jcr->sd_auth_key);
441 jcr->sd_auth_key = NULL;
443 if (jcr->VolumeName) {
444 free_pool_memory(jcr->VolumeName);
445 jcr->VolumeName = NULL;
448 if (jcr->dir_bsock) {
449 bnet_close(jcr->dir_bsock);
450 jcr->dir_bsock = NULL;
453 free_pool_memory(jcr->errmsg);
460 if (jcr->RegexWhere) {
461 free(jcr->RegexWhere);
462 jcr->RegexWhere = NULL;
464 if (jcr->where_bregexp) {
465 free_bregexps(jcr->where_bregexp);
466 delete jcr->where_bregexp;
467 jcr->where_bregexp = NULL;
469 if (jcr->cached_path) {
470 free_pool_memory(jcr->cached_path);
471 jcr->cached_path = NULL;
475 free_guid_list(jcr->id_list);
479 free_pool_memory(jcr->comment);
486 * Global routine to free a jcr
489 void b_free_jcr(const char *file, int line, JCR *jcr)
491 struct s_last_job *je;
493 Dmsg3(dbglvl, "Enter free_jcr jid=%u from %s:%d\n", jcr->JobId, file, line);
497 void free_jcr(JCR *jcr)
499 struct s_last_job *je;
501 Dmsg3(dbglvl, "Enter free_jcr jid=%u use_count=%d Job=%s\n",
502 jcr->JobId, jcr->use_count(), jcr->Job);
507 jcr->dec_use_count(); /* decrement use count */
508 if (jcr->use_count() < 0) {
509 Jmsg2(jcr, M_ERROR, 0, _("JCR use_count=%d JobId=%d\n"),
510 jcr->use_count(), jcr->JobId);
512 if (jcr->JobId > 0) {
513 Dmsg3(dbglvl, "Dec free_jcr jid=%u use_count=%d Job=%s\n",
514 jcr->JobId, jcr->use_count(), jcr->Job);
516 if (jcr->use_count() > 0) { /* if in use */
520 if (jcr->JobId > 0) {
521 Dmsg3(dbglvl, "remove jcr jid=%u use_count=%d Job=%s\n",
522 jcr->JobId, jcr->use_count(), jcr->Job);
524 remove_jcr(jcr); /* remove Jcr from chain */
527 dequeue_messages(jcr);
528 job_end_pop(jcr); /* pop and call hooked routines */
530 Dmsg1(dbglvl, "End job=%d\n", jcr->JobId);
532 /* Keep some statistics */
533 switch (jcr->getJobType()) {
540 /* Keep list of last jobs, but not Console where JobId==0 */
541 if (jcr->JobId > 0) {
542 lock_last_jobs_list();
544 je = (struct s_last_job *)malloc(sizeof(struct s_last_job));
545 memset(je, 0, sizeof(struct s_last_job)); /* zero in case unset fields */
546 je->Errors = jcr->JobErrors;
547 je->JobType = jcr->getJobType();
548 je->JobId = jcr->JobId;
549 je->VolSessionId = jcr->VolSessionId;
550 je->VolSessionTime = jcr->VolSessionTime;
551 bstrncpy(je->Job, jcr->Job, sizeof(je->Job));
552 je->JobFiles = jcr->JobFiles;
553 je->JobBytes = jcr->JobBytes;
554 je->JobStatus = jcr->JobStatus;
555 je->JobLevel = jcr->getJobLevel();
556 je->start_time = jcr->start_time;
557 je->end_time = time(NULL);
560 init_last_jobs_list();
562 last_jobs->append(je);
563 if (last_jobs->size() > max_last_jobs) {
564 je = (struct s_last_job *)last_jobs->first();
565 last_jobs->remove(je);
568 unlock_last_jobs_list();
575 if (jcr->daemon_free_jcr) {
576 jcr->daemon_free_jcr(jcr); /* call daemon free routine */
579 free_common_jcr(jcr);
580 close_msg(NULL); /* flush any daemon messages */
581 Dmsg0(dbglvl, "Exit free_jcr\n");
585 * Remove jcr from thread specific data, but
586 * but make sure it is us who are attached.
588 void remove_jcr_from_tsd(JCR *jcr)
590 JCR *tjcr = get_jcr_from_tsd();
592 set_jcr_in_tsd(INVALID_JCR);
596 void JCR::set_killable(bool killable)
600 jcr->my_thread_killable = killable;
602 jcr->my_thread_id = pthread_self();
604 memset(&jcr->my_thread_id, 0, sizeof(jcr->my_thread_id));
610 * Put this jcr in the thread specifc data
611 * if update_thread_info is true and the jcr is valide,
612 * we update the my_thread_id in the JCR
614 void set_jcr_in_tsd(JCR *jcr)
616 int status = pthread_setspecific(jcr_key, (void *)jcr);
619 Jmsg1(jcr, M_ABORT, 0, _("pthread_setspecific failed: ERR=%s\n"),
620 be.bstrerror(status));
624 void JCR::my_thread_send_signal(int sig)
627 if (this->is_killable() &&
628 !pthread_equal(this->my_thread_id, pthread_self()))
630 Dmsg1(800, "Send kill to jid=%d\n", this->JobId);
631 pthread_kill(this->my_thread_id, sig);
633 } else if (!this->is_killable()) {
634 Dmsg1(10, "Warning, can't send kill to jid=%d\n", this->JobId);
640 * Give me the jcr that is attached to this thread
642 JCR *get_jcr_from_tsd()
644 JCR *jcr = (JCR *)pthread_getspecific(jcr_key);
645 // printf("get_jcr_from_tsd: jcr=%p\n", jcr);
646 /* set any INVALID_JCR to NULL which the rest of Bacula understands */
647 if (jcr == INVALID_JCR) {
655 * Find which JobId corresponds to the current thread
657 uint32_t get_jobid_from_tsd()
661 jcr = get_jcr_from_tsd();
662 // printf("get_jobid_from_tsr: jcr=%p\n", jcr);
664 JobId = (uint32_t)jcr->JobId;
670 * Given a JobId, find the JCR
671 * Returns: jcr on success
674 JCR *get_jcr_by_id(uint32_t JobId)
679 if (jcr->JobId == JobId) {
680 jcr->inc_use_count();
681 Dmsg3(dbglvl, "Inc get_jcr jid=%u use_count=%d Job=%s\n",
682 jcr->JobId, jcr->use_count(), jcr->Job);
691 * Given a thread id, find the JobId
692 * Returns: JobId on success
695 uint32_t get_jobid_from_tid(pthread_t tid)
701 if (pthread_equal(jcr->my_thread_id, tid)) {
715 * Given a SessionId and SessionTime, find the JCR
716 * Returns: jcr on success
719 JCR *get_jcr_by_session(uint32_t SessionId, uint32_t SessionTime)
724 if (jcr->VolSessionId == SessionId &&
725 jcr->VolSessionTime == SessionTime) {
726 jcr->inc_use_count();
727 Dmsg3(dbglvl, "Inc get_jcr jid=%u use_count=%d Job=%s\n",
728 jcr->JobId, jcr->use_count(), jcr->Job);
738 * Given a Job, find the JCR
739 * compares on the number of characters in Job
740 * thus allowing partial matches.
741 * Returns: jcr on success
744 JCR *get_jcr_by_partial_name(char *Job)
754 if (strncmp(Job, jcr->Job, len) == 0) {
755 jcr->inc_use_count();
756 Dmsg3(dbglvl, "Inc get_jcr jid=%u use_count=%d Job=%s\n",
757 jcr->JobId, jcr->use_count(), jcr->Job);
767 * Given a Job, find the JCR
768 * requires an exact match of names.
769 * Returns: jcr on success
772 JCR *get_jcr_by_full_name(char *Job)
780 if (strcmp(jcr->Job, Job) == 0) {
781 jcr->inc_use_count();
782 Dmsg3(dbglvl, "Inc get_jcr jid=%u use_count=%d Job=%s\n",
783 jcr->JobId, jcr->use_count(), jcr->Job);
791 static void update_wait_time(JCR *jcr, int newJobStatus)
793 bool enter_in_waittime;
794 int oldJobStatus = jcr->JobStatus;
796 switch (newJobStatus) {
801 case JS_WaitStoreRes:
803 case JS_WaitClientRes:
805 case JS_WaitPriority:
806 enter_in_waittime = true;
809 enter_in_waittime = false; /* not a Wait situation */
814 * If we were previously waiting and are not any more
815 * we want to update the wait_time variable, which is
816 * the start of waiting.
818 switch (oldJobStatus) {
823 case JS_WaitStoreRes:
825 case JS_WaitClientRes:
827 case JS_WaitPriority:
828 if (!enter_in_waittime) { /* we get out the wait time */
829 jcr->wait_time_sum += (time(NULL) - jcr->wait_time);
834 /* if wait state is new, we keep current time for watchdog MaxWaitTime */
836 if (enter_in_waittime) {
837 jcr->wait_time = time(NULL);
844 * Priority runs from 0 (lowest) to 10 (highest)
846 static int get_status_priority(int JobStatus)
853 case JS_ErrorTerminated:
869 * Send Job status to Director
871 bool JCR::sendJobStatus()
874 if (jcr->dir_bsock) {
875 return jcr->dir_bsock->fsend(Job_status, jcr->Job, jcr->JobStatus);
881 * Set and send Job status to Director
883 bool JCR::sendJobStatus(int newJobStatus)
886 if (!jcr->is_JobStatus(newJobStatus)) {
887 setJobStatus(newJobStatus);
888 if (jcr->dir_bsock) {
889 return jcr->dir_bsock->fsend(Job_status, jcr->Job, jcr->JobStatus);
896 void JCR::setJobStatus(int newJobStatus)
899 int priority, old_priority;
900 int oldJobStatus = jcr->JobStatus;
901 priority = get_status_priority(newJobStatus);
902 old_priority = get_status_priority(oldJobStatus);
904 Dmsg2(800, "set_jcr_job_status(%s, %c)\n", Job, newJobStatus);
906 /* Update wait_time depending on newJobStatus and oldJobStatus */
907 update_wait_time(jcr, newJobStatus);
910 * For a set of errors, ... keep the current status
911 * so it isn't lost. For all others, set it.
913 Dmsg2(800, "OnEntry JobStatus=%c newJobstatus=%c\n", oldJobStatus, newJobStatus);
915 * If status priority is > than proposed new status, change it.
916 * If status priority == new priority and both are zero, take
918 * If it is not zero, then we keep the first non-zero "error" that
921 if (priority > old_priority || (
922 priority == 0 && old_priority == 0)) {
923 Dmsg4(800, "Set new stat. old: %c,%d new: %c,%d\n",
924 jcr->JobStatus, old_priority, newJobStatus, priority);
925 jcr->JobStatus = newJobStatus; /* replace with new status */
928 if (oldJobStatus != jcr->JobStatus) {
929 Dmsg2(800, "leave setJobStatus old=%c new=%c\n", oldJobStatus, newJobStatus);
930 // generate_plugin_event(jcr, bEventStatusChange, NULL);
934 #ifdef TRACE_JCR_CHAIN
935 static int lock_count = 0;
941 #ifdef TRACE_JCR_CHAIN
942 static void b_lock_jcr_chain(const char *fname, int line)
944 static void lock_jcr_chain()
947 #ifdef TRACE_JCR_CHAIN
948 Dmsg3(dbglvl, "Lock jcr chain %d from %s:%d\n", ++lock_count, fname, line);
956 #ifdef TRACE_JCR_CHAIN
957 static void b_unlock_jcr_chain(const char *fname, int line)
959 static void unlock_jcr_chain()
962 #ifdef TRACE_JCR_CHAIN
963 Dmsg3(dbglvl, "Unlock jcr chain %d from %s:%d\n", lock_count--, fname, line);
969 * Start walk of jcr chain
970 * The proper way to walk the jcr chain is:
977 * It is possible to leave out the endeach_jcr(jcr), but
978 * in that case, the last jcr referenced must be explicitly
984 JCR *jcr_walk_start()
988 jcr = (JCR *)jcrs->first();
990 jcr->inc_use_count();
991 if (jcr->JobId > 0) {
992 Dmsg3(dbglvl, "Inc walk_start jid=%u use_count=%d Job=%s\n",
993 jcr->JobId, jcr->use_count(), jcr->Job);
1001 * Get next jcr from chain, and release current one
1003 JCR *jcr_walk_next(JCR *prev_jcr)
1008 jcr = (JCR *)jcrs->next(prev_jcr);
1010 jcr->inc_use_count();
1011 if (jcr->JobId > 0) {
1012 Dmsg3(dbglvl, "Inc walk_next jid=%u use_count=%d Job=%s\n",
1013 jcr->JobId, jcr->use_count(), jcr->Job);
1024 * Release last jcr referenced
1026 void jcr_walk_end(JCR *jcr)
1029 if (jcr->JobId > 0) {
1030 Dmsg3(dbglvl, "Free walk_end jid=%u use_count=%d Job=%s\n",
1031 jcr->JobId, jcr->use_count(), jcr->Job);
1038 * Return number of Jobs
1046 for (jcr = (JCR *)jcrs->first(); (jcr = (JCR *)jcrs->next(jcr)); ) {
1047 if (jcr->JobId > 0) {
1057 * Setup to call the timeout check routine every 30 seconds
1058 * This routine will check any timers that have been enabled.
1060 bool init_jcr_subsystem(void)
1062 watchdog_t *wd = new_watchdog();
1064 wd->one_shot = false;
1065 wd->interval = 30; /* FIXME: should be configurable somewhere, even
1066 if only with a #define */
1067 wd->callback = jcr_timeout_check;
1069 register_watchdog(wd);
1074 static void jcr_timeout_check(watchdog_t *self)
1080 Dmsg0(dbglvl, "Start JCR timeout checks\n");
1082 /* Walk through all JCRs checking if any one is
1083 * blocked for more than specified max time.
1086 Dmsg2(dbglvl, "jcr_timeout_check JobId=%u jcr=0x%x\n", jcr->JobId, jcr);
1087 if (jcr->JobId == 0) {
1090 bs = jcr->store_bsock;
1092 timer_start = bs->timer_start;
1093 if (timer_start && (watchdog_time - timer_start) > bs->timeout) {
1094 bs->timer_start = 0; /* turn off timer */
1095 bs->set_timed_out();
1096 Qmsg(jcr, M_ERROR, 0, _(
1097 "Watchdog sending kill after %d secs to thread stalled reading Storage daemon.\n"),
1098 watchdog_time - timer_start);
1099 jcr->my_thread_send_signal(TIMEOUT_SIGNAL);
1102 bs = jcr->file_bsock;
1104 timer_start = bs->timer_start;
1105 if (timer_start && (watchdog_time - timer_start) > bs->timeout) {
1106 bs->timer_start = 0; /* turn off timer */
1107 bs->set_timed_out();
1108 Qmsg(jcr, M_ERROR, 0, _(
1109 "Watchdog sending kill after %d secs to thread stalled reading File daemon.\n"),
1110 watchdog_time - timer_start);
1111 jcr->my_thread_send_signal(TIMEOUT_SIGNAL);
1114 bs = jcr->dir_bsock;
1116 timer_start = bs->timer_start;
1117 if (timer_start && (watchdog_time - timer_start) > bs->timeout) {
1118 bs->timer_start = 0; /* turn off timer */
1119 bs->set_timed_out();
1120 Qmsg(jcr, M_ERROR, 0, _(
1121 "Watchdog sending kill after %d secs to thread stalled reading Director.\n"),
1122 watchdog_time - timer_start);
1123 jcr->my_thread_send_signal(TIMEOUT_SIGNAL);
1129 Dmsg0(dbglvl, "Finished JCR timeout checks\n");
1133 * Return next JobId from comma separated list
1136 * 1 if next JobId returned
1137 * 0 if no more JobIds are in list
1138 * -1 there is an error
1140 int get_next_jobid_from_list(char **p, uint32_t *JobId)
1142 const int maxlen = 30;
1143 char jobid[maxlen+1];
1147 for (int i=0; i<maxlen; i++) {
1150 } else if (*q == ',') {
1157 if (jobid[0] == 0) {
1159 } else if (!is_a_number(jobid)) {
1160 return -1; /* error */
1163 *JobId = str_to_int64(jobid);
1168 * Timeout signal comes here
1170 extern "C" void timeout_handler(int sig)
1172 return; /* thus interrupting the function */
1175 /* Used to display specific daemon information after a fatal signal
1176 * (like B_DB in the director)
1178 #define MAX_DBG_HOOK 10
1179 static dbg_jcr_hook_t *dbg_jcr_hooks[MAX_DBG_HOOK];
1180 static int dbg_jcr_handler_count;
1182 void dbg_jcr_add_hook(dbg_jcr_hook_t *hook)
1184 ASSERT(dbg_jcr_handler_count < MAX_DBG_HOOK);
1185 dbg_jcr_hooks[dbg_jcr_handler_count++] = hook;
1191 * This function should be used ONLY after a fatal signal. We walk through the
1192 * JCR chain without doing any lock, Bacula should not be running.
1194 void dbg_print_jcr(FILE *fp)
1196 char buf1[128], buf2[128], buf3[128], buf4[128];
1201 fprintf(fp, "Attempt to dump current JCRs. njcrs=%d\n", jcrs->size());
1203 for (JCR *jcr = (JCR *)jcrs->first(); jcr ; jcr = (JCR *)jcrs->next(jcr)) {
1204 fprintf(fp, "threadid=%p JobId=%d JobStatus=%c jcr=%p name=%s\n",
1205 (void *)jcr->my_thread_id, (int)jcr->JobId, jcr->JobStatus, jcr, jcr->Job);
1206 fprintf(fp, "threadid=%p killable=%d JobId=%d JobStatus=%c "
1208 (void *)jcr->my_thread_id, jcr->is_killable(),
1209 (int)jcr->JobId, jcr->JobStatus, jcr, jcr->Job);
1210 fprintf(fp, "\tuse_count=%i\n", jcr->use_count());
1211 fprintf(fp, "\tJobType=%c JobLevel=%c\n",
1212 jcr->getJobType(), jcr->getJobLevel());
1213 bstrftime(buf1, sizeof(buf1), jcr->sched_time);
1214 bstrftime(buf2, sizeof(buf2), jcr->start_time);
1215 bstrftime(buf3, sizeof(buf3), jcr->end_time);
1216 bstrftime(buf4, sizeof(buf4), jcr->wait_time);
1217 fprintf(fp, "\tsched_time=%s start_time=%s\n\tend_time=%s wait_time=%s\n",
1218 buf1, buf2, buf3, buf4);
1219 fprintf(fp, "\tdb=%p db_batch=%p batch_started=%i\n",
1220 jcr->db, jcr->db_batch, jcr->batch_started);
1223 * Call all the jcr debug hooks
1225 for(int i=0; i < dbg_jcr_handler_count; i++) {
1226 dbg_jcr_hook_t *hook = dbg_jcr_hooks[i];