2 Bacula® - The Network Backup Solution
4 Copyright (C) 2000-2014 Free Software Foundation Europe e.V.
6 The main author of Bacula is Kern Sibbald, with contributions from many
7 others, a complete list can be found in the file AUTHORS.
9 You may use this file and others of this release according to the
10 license defined in the LICENSE file, which includes the Affero General
11 Public License, v3.0 ("AGPLv3") and some additional permissions and
12 terms pursuant to its AGPLv3 Section 7.
14 Bacula® is a registered trademark of Kern Sibbald.
17 * Manipulation routines for Job Control Records and
18 * handling of last_jobs_list.
20 * Kern E. Sibbald, December 2000
22 * These routines are thread safe.
24 * The job list routines were re-written in May 2005 to
25 * eliminate the global lock while traversing the list, and
26 * to use the dlist subroutines. The locking is now done
27 * on the list each time the list is modified or traversed.
28 * That is it is "micro-locked" rather than globally locked.
29 * The result is that there is one lock/unlock for each entry
30 * in the list while traversing it rather than a single lock
31 * at the beginning of a traversal and one at the end. This
32 * incurs slightly more overhead, but effectively eliminates
33 * the possibilty of race conditions. In addition, with the
34 * exception of the global locking of the list during the
35 * re-reading of the config file, no recursion is needed.
42 const int dbglvl = 3400;
44 /* External variables we reference */
46 /* External referenced functions */
47 void free_bregexps(alist *bregexps);
49 /* Forward referenced functions */
50 extern "C" void timeout_handler(int sig);
51 static void jcr_timeout_check(watchdog_t *self);
52 #ifdef TRACE_JCR_CHAIN
53 static void b_lock_jcr_chain(const char *filen, int line);
54 static void b_unlock_jcr_chain(const char *filen, int line);
55 #define lock_jcr_chain() b_lock_jcr_chain(__FILE__, __LINE__);
56 #define unlock_jcr_chain() b_unlock_jcr_chain(__FILE__, __LINE__);
58 static void lock_jcr_chain();
59 static void unlock_jcr_chain();
64 dlist *last_jobs = NULL;
65 const int max_last_jobs = 10;
67 static dlist *jcrs = NULL; /* JCR chain */
68 static pthread_mutex_t jcr_lock = PTHREAD_MUTEX_INITIALIZER;
70 static pthread_mutex_t job_start_mutex = PTHREAD_MUTEX_INITIALIZER;
72 static pthread_mutex_t last_jobs_mutex = PTHREAD_MUTEX_INITIALIZER;
74 static pthread_key_t jcr_key; /* Pointer to jcr for each thread */
76 pthread_once_t key_once = PTHREAD_ONCE_INIT;
78 static char Job_status[] = "Status Job=%s JobStatus=%d\n";
91 void init_last_jobs_list()
94 struct s_last_job *job_entry = NULL;
96 last_jobs = New(dlist(job_entry, &job_entry->link));
99 jcrs = New(dlist(jcr, &jcr->link));
103 void term_last_jobs_list()
106 lock_last_jobs_list();
107 while (!last_jobs->empty()) {
108 void *je = last_jobs->first();
109 last_jobs->remove(je);
114 unlock_last_jobs_list();
122 bool read_last_jobs_list(int fd, uint64_t addr)
124 struct s_last_job *je, job;
128 Dmsg1(100, "read_last_jobs seek to %d\n", (int)addr);
129 if (addr == 0 || lseek(fd, (boffset_t)addr, SEEK_SET) < 0) {
132 if (read(fd, &num, sizeof(num)) != sizeof(num)) {
135 Dmsg1(100, "Read num_items=%d\n", num);
136 if (num > 4 * max_last_jobs) { /* sanity check */
139 lock_last_jobs_list();
140 for ( ; num; num--) {
141 if (read(fd, &job, sizeof(job)) != sizeof(job)) {
143 Pmsg1(000, "Read job entry. ERR=%s\n", be.bstrerror());
148 je = (struct s_last_job *)malloc(sizeof(struct s_last_job));
149 memcpy((char *)je, (char *)&job, sizeof(job));
151 init_last_jobs_list();
153 last_jobs->append(je);
154 if (last_jobs->size() > max_last_jobs) {
155 je = (struct s_last_job *)last_jobs->first();
156 last_jobs->remove(je);
161 unlock_last_jobs_list();
165 uint64_t write_last_jobs_list(int fd, uint64_t addr)
167 struct s_last_job *je;
171 Dmsg1(100, "write_last_jobs seek to %d\n", (int)addr);
172 if (lseek(fd, (boffset_t)addr, SEEK_SET) < 0) {
176 lock_last_jobs_list();
177 /* First record is number of entires */
178 num = last_jobs->size();
179 if (write(fd, &num, sizeof(num)) != sizeof(num)) {
181 Pmsg1(000, "Error writing num_items: ERR=%s\n", be.bstrerror());
184 foreach_dlist(je, last_jobs) {
185 if (write(fd, je, sizeof(struct s_last_job)) != sizeof(struct s_last_job)) {
187 Pmsg1(000, "Error writing job: ERR=%s\n", be.bstrerror());
191 unlock_last_jobs_list();
193 /* Return current address */
194 stat = lseek(fd, 0, SEEK_CUR);
201 unlock_last_jobs_list();
205 void lock_last_jobs_list()
210 void unlock_last_jobs_list()
215 /* Get an ASCII representation of the Operation being performed as an english Noun */
216 const char *JCR::get_OperationName()
222 return _("Verifying");
224 return _("Restoring");
226 return _("Archiving");
230 return _("Migration");
232 return _("Scanning");
234 return _("Unknown operation");
238 /* Get an ASCII representation of the Action being performed either an english Verb or Adjective */
239 const char *JCR::get_ActionName(bool past)
245 return (past == true) ? _("verified") : _("verify");
247 return (past == true) ? _("restored") : _("restore");
249 return (past == true) ? _("archived") : _("archive");
251 return (past == true) ? _("copied") : _("copy");
253 return (past == true) ? _("migrated") : _("migrate");
255 return (past == true) ? _("scanned") : _("scan");
257 return _("unknown action");
270 if (m_JobLevel == L_VIRTUAL_FULL) {
281 * Push a subroutine address into the job end callback stack
283 void job_end_push(JCR *jcr, void job_end_cb(JCR *jcr,void *), void *ctx)
285 jcr->job_end_push.append((void *)job_end_cb);
286 jcr->job_end_push.append(ctx);
289 /* Pop each job_end subroutine and call it */
290 static void job_end_pop(JCR *jcr)
292 void (*job_end_cb)(JCR *jcr, void *ctx);
294 for (int i=jcr->job_end_push.size()-1; i > 0; ) {
295 ctx = jcr->job_end_push.get(i--);
296 job_end_cb = (void (*)(JCR *,void *))jcr->job_end_push.get(i--);
297 job_end_cb(jcr, ctx);
302 * Create thread key for thread specific data
304 void create_jcr_key()
306 int status = pthread_key_create(&jcr_key, NULL);
309 Jmsg1(NULL, M_ABORT, 0, _("pthread key create failed: ERR=%s\n"),
310 be.bstrerror(status));
315 * Create a Job Control Record and link it into JCR chain
316 * Returns newly allocated JCR
317 * Note, since each daemon has a different JCR, he passes
320 JCR *new_jcr(int size, JCR_free_HANDLER *daemon_free_jcr)
323 MQUEUE_ITEM *item = NULL;
324 struct sigaction sigtimer;
327 Dmsg0(dbglvl, "Enter new_jcr\n");
328 status = pthread_once(&key_once, create_jcr_key);
331 Jmsg1(NULL, M_ABORT, 0, _("pthread_once failed. ERR=%s\n"), be.bstrerror(status));
333 jcr = (JCR *)malloc(size);
334 memset(jcr, 0, size);
335 /* Note for the director, this value is changed in jobq.c */
336 jcr->my_thread_id = pthread_self();
337 jcr->msg_queue = New(dlist(item, &item->link));
338 if ((status = pthread_mutex_init(&jcr->msg_queue_mutex, NULL)) != 0) {
340 Jmsg(NULL, M_ABORT, 0, _("Could not init msg_queue mutex. ERR=%s\n"),
341 be.bstrerror(status));
343 jcr->job_end_push.init(1, false);
344 jcr->sched_time = time(NULL);
345 jcr->initial_sched_time = jcr->sched_time;
346 jcr->daemon_free_jcr = daemon_free_jcr; /* plug daemon free routine */
348 jcr->inc_use_count();
349 jcr->VolumeName = get_pool_memory(PM_FNAME);
350 jcr->VolumeName[0] = 0;
351 jcr->errmsg = get_pool_memory(PM_MESSAGE);
353 jcr->comment = get_pool_memory(PM_FNAME);
355 /* Setup some dummy values */
356 bstrncpy(jcr->Job, "*System*", sizeof(jcr->Job));
358 jcr->setJobType(JT_SYSTEM); /* internal job until defined */
359 jcr->setJobLevel(L_NONE);
360 jcr->setJobStatus(JS_Created); /* ready to run */
361 sigtimer.sa_flags = 0;
362 sigtimer.sa_handler = timeout_handler;
363 sigfillset(&sigtimer.sa_mask);
364 sigaction(TIMEOUT_SIGNAL, &sigtimer, NULL);
367 * Locking jobs is a global lock that is needed
368 * so that the Director can stop new jobs from being
369 * added to the jcr chain while it processes a new
370 * conf file and does the job_end_push().
375 jcrs = New(dlist(jcr, &jcr->link));
386 * Remove a JCR from the chain
387 * NOTE! The chain must be locked prior to calling
390 static void remove_jcr(JCR *jcr)
392 Dmsg0(dbglvl, "Enter remove_jcr\n");
394 Emsg0(M_ABORT, 0, _("NULL jcr.\n"));
397 Dmsg0(dbglvl, "Leave remove_jcr\n");
401 * Free stuff common to all JCRs. N.B. Be careful to include only
402 * generic stuff in the common part of the jcr.
404 static void free_common_jcr(JCR *jcr)
406 /* Uses jcr lock/unlock */
407 remove_jcr_from_tsd(jcr);
408 jcr->set_killable(false);
410 jcr->destroy_mutex();
412 if (jcr->msg_queue) {
413 delete jcr->msg_queue;
414 jcr->msg_queue = NULL;
415 pthread_mutex_destroy(&jcr->msg_queue_mutex);
418 /* do this after closing messages */
419 if (jcr->client_name) {
420 free_pool_memory(jcr->client_name);
421 jcr->client_name = NULL;
425 free_pool_memory(jcr->attr);
429 if (jcr->sd_auth_key) {
430 free(jcr->sd_auth_key);
431 jcr->sd_auth_key = NULL;
433 if (jcr->VolumeName) {
434 free_pool_memory(jcr->VolumeName);
435 jcr->VolumeName = NULL;
438 free_bsock(jcr->dir_bsock);
441 free_pool_memory(jcr->errmsg);
448 if (jcr->RegexWhere) {
449 free(jcr->RegexWhere);
450 jcr->RegexWhere = NULL;
452 if (jcr->where_bregexp) {
453 free_bregexps(jcr->where_bregexp);
454 delete jcr->where_bregexp;
455 jcr->where_bregexp = NULL;
457 if (jcr->cached_path) {
458 free_pool_memory(jcr->cached_path);
459 jcr->cached_path = NULL;
463 free_guid_list(jcr->id_list);
467 free_pool_memory(jcr->comment);
474 * Global routine to free a jcr
477 void b_free_jcr(const char *file, int line, JCR *jcr)
479 struct s_last_job *je;
481 Dmsg3(dbglvl, "Enter free_jcr jid=%u from %s:%d\n", jcr->JobId, file, line);
485 void free_jcr(JCR *jcr)
487 struct s_last_job *je;
489 Dmsg3(dbglvl, "Enter free_jcr jid=%u use_count=%d Job=%s\n",
490 jcr->JobId, jcr->use_count(), jcr->Job);
495 jcr->dec_use_count(); /* decrement use count */
496 if (jcr->use_count() < 0) {
497 Jmsg2(jcr, M_ERROR, 0, _("JCR use_count=%d JobId=%d\n"),
498 jcr->use_count(), jcr->JobId);
500 if (jcr->JobId > 0) {
501 Dmsg3(dbglvl, "Dec free_jcr jid=%u use_count=%d Job=%s\n",
502 jcr->JobId, jcr->use_count(), jcr->Job);
504 if (jcr->use_count() > 0) { /* if in use */
508 if (jcr->JobId > 0) {
509 Dmsg3(dbglvl, "remove jcr jid=%u use_count=%d Job=%s\n",
510 jcr->JobId, jcr->use_count(), jcr->Job);
512 remove_jcr(jcr); /* remove Jcr from chain */
515 dequeue_messages(jcr);
516 close_msg(jcr); /* close messages for this job */
517 job_end_pop(jcr); /* pop and call hooked routines */
519 Dmsg1(dbglvl, "End job=%d\n", jcr->JobId);
521 /* Keep some statistics */
522 switch (jcr->getJobType()) {
529 /* Keep list of last jobs, but not Console where JobId==0 */
530 if (jcr->JobId > 0) {
531 lock_last_jobs_list();
533 je = (struct s_last_job *)malloc(sizeof(struct s_last_job));
534 memset(je, 0, sizeof(struct s_last_job)); /* zero in case unset fields */
535 je->Errors = jcr->JobErrors;
536 je->JobType = jcr->getJobType();
537 je->JobId = jcr->JobId;
538 je->VolSessionId = jcr->VolSessionId;
539 je->VolSessionTime = jcr->VolSessionTime;
540 bstrncpy(je->Job, jcr->Job, sizeof(je->Job));
541 je->JobFiles = jcr->JobFiles;
542 je->JobBytes = jcr->JobBytes;
543 je->JobStatus = jcr->JobStatus;
544 je->JobLevel = jcr->getJobLevel();
545 je->start_time = jcr->start_time;
546 je->end_time = time(NULL);
549 init_last_jobs_list();
551 last_jobs->append(je);
552 if (last_jobs->size() > max_last_jobs) {
553 je = (struct s_last_job *)last_jobs->first();
554 last_jobs->remove(je);
557 unlock_last_jobs_list();
564 if (jcr->daemon_free_jcr) {
565 jcr->daemon_free_jcr(jcr); /* call daemon free routine */
568 free_common_jcr(jcr);
569 Dmsg0(dbglvl, "Exit free_jcr\n");
573 * Remove jcr from thread specific data, but
574 * but make sure it is us who are attached.
576 void remove_jcr_from_tsd(JCR *jcr)
578 JCR *tjcr = get_jcr_from_tsd();
580 set_jcr_in_tsd(INVALID_JCR);
584 void JCR::set_killable(bool killable)
588 jcr->my_thread_killable = killable;
593 * Put this jcr in the thread specifc data
594 * if update_thread_info is true and the jcr is valide,
595 * we update the my_thread_id in the JCR
597 void set_jcr_in_tsd(JCR *jcr)
599 int status = pthread_setspecific(jcr_key, (void *)jcr);
602 Jmsg1(jcr, M_ABORT, 0, _("pthread_setspecific failed: ERR=%s\n"),
603 be.bstrerror(status));
607 void JCR::my_thread_send_signal(int sig)
610 if (this->is_killable() &&
611 !pthread_equal(this->my_thread_id, pthread_self()))
613 Dmsg1(800, "Send kill to jid=%d\n", this->JobId);
614 pthread_kill(this->my_thread_id, sig);
616 } else if (!this->is_killable()) {
617 Dmsg1(10, "Warning, can't send kill to jid=%d\n", this->JobId);
623 * Give me the jcr that is attached to this thread
625 JCR *get_jcr_from_tsd()
627 JCR *jcr = (JCR *)pthread_getspecific(jcr_key);
628 // printf("get_jcr_from_tsd: jcr=%p\n", jcr);
629 /* set any INVALID_JCR to NULL which the rest of Bacula understands */
630 if (jcr == INVALID_JCR) {
638 * Find which JobId corresponds to the current thread
640 uint32_t get_jobid_from_tsd()
644 jcr = get_jcr_from_tsd();
645 // printf("get_jobid_from_tsr: jcr=%p\n", jcr);
647 JobId = (uint32_t)jcr->JobId;
653 * Given a JobId, find the JCR
654 * Returns: jcr on success
657 JCR *get_jcr_by_id(uint32_t JobId)
662 if (jcr->JobId == JobId) {
663 jcr->inc_use_count();
664 Dmsg3(dbglvl, "Inc get_jcr jid=%u use_count=%d Job=%s\n",
665 jcr->JobId, jcr->use_count(), jcr->Job);
674 * Given a thread id, find the JobId
675 * Returns: JobId on success
678 uint32_t get_jobid_from_tid(pthread_t tid)
684 if (pthread_equal(jcr->my_thread_id, tid)) {
698 * Given a SessionId and SessionTime, find the JCR
699 * Returns: jcr on success
702 JCR *get_jcr_by_session(uint32_t SessionId, uint32_t SessionTime)
707 if (jcr->VolSessionId == SessionId &&
708 jcr->VolSessionTime == SessionTime) {
709 jcr->inc_use_count();
710 Dmsg3(dbglvl, "Inc get_jcr jid=%u use_count=%d Job=%s\n",
711 jcr->JobId, jcr->use_count(), jcr->Job);
721 * Given a Job, find the JCR
722 * compares on the number of characters in Job
723 * thus allowing partial matches.
724 * Returns: jcr on success
727 JCR *get_jcr_by_partial_name(char *Job)
737 if (strncmp(Job, jcr->Job, len) == 0) {
738 jcr->inc_use_count();
739 Dmsg3(dbglvl, "Inc get_jcr jid=%u use_count=%d Job=%s\n",
740 jcr->JobId, jcr->use_count(), jcr->Job);
750 * Given a Job, find the JCR
751 * requires an exact match of names.
752 * Returns: jcr on success
755 JCR *get_jcr_by_full_name(char *Job)
763 if (strcmp(jcr->Job, Job) == 0) {
764 jcr->inc_use_count();
765 Dmsg3(dbglvl, "Inc get_jcr jid=%u use_count=%d Job=%s\n",
766 jcr->JobId, jcr->use_count(), jcr->Job);
774 static void update_wait_time(JCR *jcr, int newJobStatus)
776 bool enter_in_waittime;
777 int oldJobStatus = jcr->JobStatus;
779 switch (newJobStatus) {
784 case JS_WaitStoreRes:
786 case JS_WaitClientRes:
788 case JS_WaitPriority:
789 enter_in_waittime = true;
792 enter_in_waittime = false; /* not a Wait situation */
797 * If we were previously waiting and are not any more
798 * we want to update the wait_time variable, which is
799 * the start of waiting.
801 switch (oldJobStatus) {
806 case JS_WaitStoreRes:
808 case JS_WaitClientRes:
810 case JS_WaitPriority:
811 if (!enter_in_waittime) { /* we get out the wait time */
812 jcr->wait_time_sum += (time(NULL) - jcr->wait_time);
817 /* if wait state is new, we keep current time for watchdog MaxWaitTime */
819 if (enter_in_waittime) {
820 jcr->wait_time = time(NULL);
827 * Priority runs from 0 (lowest) to 10 (highest)
829 static int get_status_priority(int JobStatus)
833 case JS_ErrorTerminated:
849 * Send Job status to Director
851 bool JCR::sendJobStatus()
854 if (jcr->dir_bsock) {
855 return jcr->dir_bsock->fsend(Job_status, jcr->Job, jcr->JobStatus);
861 * Set and send Job status to Director
863 bool JCR::sendJobStatus(int newJobStatus)
866 if (!jcr->is_JobStatus(newJobStatus)) {
867 setJobStatus(newJobStatus);
868 if (jcr->dir_bsock) {
869 return jcr->dir_bsock->fsend(Job_status, jcr->Job, jcr->JobStatus);
875 void JCR::setJobStarted()
878 jcr->job_started = true;
879 jcr->job_started_time = time(NULL);
882 void JCR::setJobStatus(int newJobStatus)
885 int priority, old_priority;
886 int oldJobStatus = jcr->JobStatus;
887 priority = get_status_priority(newJobStatus);
888 old_priority = get_status_priority(oldJobStatus);
890 Dmsg2(800, "set_jcr_job_status(%s, %c)\n", Job, newJobStatus);
892 /* Update wait_time depending on newJobStatus and oldJobStatus */
893 update_wait_time(jcr, newJobStatus);
896 * For a set of errors, ... keep the current status
897 * so it isn't lost. For all others, set it.
899 Dmsg2(800, "OnEntry JobStatus=%c newJobstatus=%c\n", oldJobStatus, newJobStatus);
901 * If status priority is > than proposed new status, change it.
902 * If status priority == new priority and both are zero, take
904 * If it is not zero, then we keep the first non-zero "error" that
907 if (priority > old_priority || (
908 priority == 0 && old_priority == 0)) {
909 Dmsg4(800, "Set new stat. old: %c,%d new: %c,%d\n",
910 jcr->JobStatus, old_priority, newJobStatus, priority);
911 jcr->JobStatus = newJobStatus; /* replace with new status */
914 if (oldJobStatus != jcr->JobStatus) {
915 Dmsg2(800, "leave setJobStatus old=%c new=%c\n", oldJobStatus, newJobStatus);
916 // generate_plugin_event(jcr, bEventStatusChange, NULL);
920 #ifdef TRACE_JCR_CHAIN
921 static int lock_count = 0;
927 #ifdef TRACE_JCR_CHAIN
928 static void b_lock_jcr_chain(const char *fname, int line)
930 static void lock_jcr_chain()
933 #ifdef TRACE_JCR_CHAIN
934 Dmsg3(dbglvl, "Lock jcr chain %d from %s:%d\n", ++lock_count, fname, line);
942 #ifdef TRACE_JCR_CHAIN
943 static void b_unlock_jcr_chain(const char *fname, int line)
945 static void unlock_jcr_chain()
948 #ifdef TRACE_JCR_CHAIN
949 Dmsg3(dbglvl, "Unlock jcr chain %d from %s:%d\n", lock_count--, fname, line);
955 * Start walk of jcr chain
956 * The proper way to walk the jcr chain is:
963 * It is possible to leave out the endeach_jcr(jcr), but
964 * in that case, the last jcr referenced must be explicitly
970 JCR *jcr_walk_start()
974 jcr = (JCR *)jcrs->first();
976 jcr->inc_use_count();
977 if (jcr->JobId > 0) {
978 Dmsg3(dbglvl, "Inc walk_start jid=%u use_count=%d Job=%s\n",
979 jcr->JobId, jcr->use_count(), jcr->Job);
987 * Get next jcr from chain, and release current one
989 JCR *jcr_walk_next(JCR *prev_jcr)
994 jcr = (JCR *)jcrs->next(prev_jcr);
996 jcr->inc_use_count();
997 if (jcr->JobId > 0) {
998 Dmsg3(dbglvl, "Inc walk_next jid=%u use_count=%d Job=%s\n",
999 jcr->JobId, jcr->use_count(), jcr->Job);
1010 * Release last jcr referenced
1012 void jcr_walk_end(JCR *jcr)
1015 if (jcr->JobId > 0) {
1016 Dmsg3(dbglvl, "Free walk_end jid=%u use_count=%d Job=%s\n",
1017 jcr->JobId, jcr->use_count(), jcr->Job);
1024 * Return number of Jobs
1032 for (jcr = (JCR *)jcrs->first(); (jcr = (JCR *)jcrs->next(jcr)); ) {
1033 if (jcr->JobId > 0) {
1043 * Setup to call the timeout check routine every 30 seconds
1044 * This routine will check any timers that have been enabled.
1046 bool init_jcr_subsystem(void)
1048 watchdog_t *wd = new_watchdog();
1050 wd->one_shot = false;
1051 wd->interval = 30; /* FIXME: should be configurable somewhere, even
1052 if only with a #define */
1053 wd->callback = jcr_timeout_check;
1055 register_watchdog(wd);
1060 static void jcr_timeout_check(watchdog_t *self)
1066 Dmsg0(dbglvl, "Start JCR timeout checks\n");
1068 /* Walk through all JCRs checking if any one is
1069 * blocked for more than specified max time.
1072 Dmsg2(dbglvl, "jcr_timeout_check JobId=%u jcr=0x%x\n", jcr->JobId, jcr);
1073 if (jcr->JobId == 0) {
1076 bs = jcr->store_bsock;
1078 timer_start = bs->timer_start;
1079 if (timer_start && (watchdog_time - timer_start) > bs->timeout) {
1080 bs->timer_start = 0; /* turn off timer */
1081 bs->set_timed_out();
1082 Qmsg(jcr, M_ERROR, 0, _(
1083 "Watchdog sending kill after %d secs to thread stalled reading Storage daemon.\n"),
1084 (int)(watchdog_time - timer_start));
1085 jcr->my_thread_send_signal(TIMEOUT_SIGNAL);
1088 bs = jcr->file_bsock;
1090 timer_start = bs->timer_start;
1091 if (timer_start && (watchdog_time - timer_start) > bs->timeout) {
1092 bs->timer_start = 0; /* turn off timer */
1093 bs->set_timed_out();
1094 Qmsg(jcr, M_ERROR, 0, _(
1095 "Watchdog sending kill after %d secs to thread stalled reading File daemon.\n"),
1096 (int)(watchdog_time - timer_start));
1097 jcr->my_thread_send_signal(TIMEOUT_SIGNAL);
1100 bs = jcr->dir_bsock;
1102 timer_start = bs->timer_start;
1103 if (timer_start && (watchdog_time - timer_start) > bs->timeout) {
1104 bs->timer_start = 0; /* turn off timer */
1105 bs->set_timed_out();
1106 Qmsg(jcr, M_ERROR, 0, _(
1107 "Watchdog sending kill after %d secs to thread stalled reading Director.\n"),
1108 (int)(watchdog_time - timer_start));
1109 jcr->my_thread_send_signal(TIMEOUT_SIGNAL);
1115 Dmsg0(dbglvl, "Finished JCR timeout checks\n");
1119 * Return next JobId from comma separated list
1122 * 1 if next JobId returned
1123 * 0 if no more JobIds are in list
1124 * -1 there is an error
1126 int get_next_jobid_from_list(char **p, uint32_t *JobId)
1128 const int maxlen = 30;
1129 char jobid[maxlen+1];
1133 for (int i=0; i<maxlen; i++) {
1136 } else if (*q == ',') {
1143 if (jobid[0] == 0) {
1145 } else if (!is_a_number(jobid)) {
1146 return -1; /* error */
1149 *JobId = str_to_int64(jobid);
1154 * Timeout signal comes here
1156 extern "C" void timeout_handler(int sig)
1158 return; /* thus interrupting the function */
1161 /* Used to display specific daemon information after a fatal signal
1162 * (like B_DB in the director)
1164 #define MAX_DBG_HOOK 10
1165 static dbg_jcr_hook_t *dbg_jcr_hooks[MAX_DBG_HOOK];
1166 static int dbg_jcr_handler_count;
1168 void dbg_jcr_add_hook(dbg_jcr_hook_t *hook)
1170 ASSERT(dbg_jcr_handler_count < MAX_DBG_HOOK);
1171 dbg_jcr_hooks[dbg_jcr_handler_count++] = hook;
1174 /* on win32, the pthread_t is a struct, so we don't display it */
1175 #ifdef HAVE_MINGW_W64
1176 # define get_threadid(a) (void *)0
1178 # define get_threadid(a) (void *)(a)
1183 * This function should be used ONLY after a fatal signal. We walk through the
1184 * JCR chain without doing any lock, Bacula should not be running.
1186 void dbg_print_jcr(FILE *fp)
1188 char buf1[128], buf2[128], buf3[128], buf4[128];
1193 fprintf(fp, "Attempt to dump current JCRs. njcrs=%d\n", jcrs->size());
1195 for (JCR *jcr = (JCR *)jcrs->first(); jcr ; jcr = (JCR *)jcrs->next(jcr)) {
1196 fprintf(fp, "threadid=%p JobId=%d JobStatus=%c jcr=%p name=%s\n",
1197 get_threadid(jcr->my_thread_id), (int)jcr->JobId, jcr->JobStatus, jcr, jcr->Job);
1198 fprintf(fp, "threadid=%p killable=%d JobId=%d JobStatus=%c "
1200 get_threadid(jcr->my_thread_id), jcr->is_killable(),
1201 (int)jcr->JobId, jcr->JobStatus, jcr, jcr->Job);
1202 fprintf(fp, "\tuse_count=%i\n", jcr->use_count());
1203 fprintf(fp, "\tJobType=%c JobLevel=%c\n",
1204 jcr->getJobType(), jcr->getJobLevel());
1205 bstrftime(buf1, sizeof(buf1), jcr->sched_time);
1206 bstrftime(buf2, sizeof(buf2), jcr->start_time);
1207 bstrftime(buf3, sizeof(buf3), jcr->end_time);
1208 bstrftime(buf4, sizeof(buf4), jcr->wait_time);
1209 fprintf(fp, "\tsched_time=%s start_time=%s\n\tend_time=%s wait_time=%s\n",
1210 buf1, buf2, buf3, buf4);
1211 fprintf(fp, "\tdb=%p db_batch=%p batch_started=%i\n",
1212 jcr->db, jcr->db_batch, jcr->batch_started);
1215 * Call all the jcr debug hooks
1217 for(int i=0; i < dbg_jcr_handler_count; i++) {
1218 dbg_jcr_hook_t *hook = dbg_jcr_hooks[i];