2 Bacula(R) - The Network Backup Solution
4 Copyright (C) 2000-2015 Kern Sibbald
5 Copyright (C) 2000-2014 Free Software Foundation Europe e.V.
7 The original author of Bacula is Kern Sibbald, with contributions
8 from many others, a complete list can be found in the file AUTHORS.
10 You may use this file and others of this release according to the
11 license defined in the LICENSE file, which includes the Affero General
12 Public License, v3.0 ("AGPLv3") and some additional permissions and
13 terms pursuant to its AGPLv3 Section 7.
15 This notice must be preserved when any source code is
16 conveyed and/or propagated.
18 Bacula(R) is a registered trademark of Kern Sibbald.
21 * Manipulation routines for Job Control Records and
22 * handling of last_jobs_list.
24 * Kern E. Sibbald, December 2000
26 * These routines are thread safe.
28 * The job list routines were re-written in May 2005 to
29 * eliminate the global lock while traversing the list, and
30 * to use the dlist subroutines. The locking is now done
31 * on the list each time the list is modified or traversed.
32 * That is it is "micro-locked" rather than globally locked.
33 * The result is that there is one lock/unlock for each entry
34 * in the list while traversing it rather than a single lock
35 * at the beginning of a traversal and one at the end. This
36 * incurs slightly more overhead, but effectively eliminates
37 * the possibilty of race conditions. In addition, with the
38 * exception of the global locking of the list during the
39 * re-reading of the config file, no recursion is needed.
46 const int dbglvl = 3400;
48 /* External variables we reference */
50 /* External referenced functions */
51 void free_bregexps(alist *bregexps);
53 /* Forward referenced functions */
54 extern "C" void timeout_handler(int sig);
55 static void jcr_timeout_check(watchdog_t *self);
56 #ifdef TRACE_JCR_CHAIN
57 static void b_lock_jcr_chain(const char *filen, int line);
58 static void b_unlock_jcr_chain(const char *filen, int line);
59 #define lock_jcr_chain() b_lock_jcr_chain(__FILE__, __LINE__);
60 #define unlock_jcr_chain() b_unlock_jcr_chain(__FILE__, __LINE__);
62 static void lock_jcr_chain();
63 static void unlock_jcr_chain();
68 dlist *last_jobs = NULL;
69 const int max_last_jobs = 10;
71 static dlist *jcrs = NULL; /* JCR chain */
72 static pthread_mutex_t jcr_lock = PTHREAD_MUTEX_INITIALIZER;
74 static pthread_mutex_t job_start_mutex = PTHREAD_MUTEX_INITIALIZER;
76 static pthread_mutex_t last_jobs_mutex = PTHREAD_MUTEX_INITIALIZER;
78 static pthread_key_t jcr_key; /* Pointer to jcr for each thread */
80 pthread_once_t key_once = PTHREAD_ONCE_INIT;
82 static char Job_status[] = "Status Job=%s JobStatus=%d\n";
95 void init_last_jobs_list()
98 struct s_last_job *job_entry = NULL;
100 last_jobs = New(dlist(job_entry, &job_entry->link));
103 jcrs = New(dlist(jcr, &jcr->link));
107 void term_last_jobs_list()
110 lock_last_jobs_list();
111 while (!last_jobs->empty()) {
112 void *je = last_jobs->first();
113 last_jobs->remove(je);
118 unlock_last_jobs_list();
126 bool read_last_jobs_list(int fd, uint64_t addr)
128 struct s_last_job *je, job;
132 Dmsg1(100, "read_last_jobs seek to %d\n", (int)addr);
133 if (addr == 0 || lseek(fd, (boffset_t)addr, SEEK_SET) < 0) {
136 if (read(fd, &num, sizeof(num)) != sizeof(num)) {
139 Dmsg1(100, "Read num_items=%d\n", num);
140 if (num > 4 * max_last_jobs) { /* sanity check */
143 lock_last_jobs_list();
144 for ( ; num; num--) {
145 if (read(fd, &job, sizeof(job)) != sizeof(job)) {
147 Pmsg1(000, "Read job entry. ERR=%s\n", be.bstrerror());
152 je = (struct s_last_job *)malloc(sizeof(struct s_last_job));
153 memcpy((char *)je, (char *)&job, sizeof(job));
155 init_last_jobs_list();
157 last_jobs->append(je);
158 if (last_jobs->size() > max_last_jobs) {
159 je = (struct s_last_job *)last_jobs->first();
160 last_jobs->remove(je);
165 unlock_last_jobs_list();
169 uint64_t write_last_jobs_list(int fd, uint64_t addr)
171 struct s_last_job *je;
175 Dmsg1(100, "write_last_jobs seek to %d\n", (int)addr);
176 if (lseek(fd, (boffset_t)addr, SEEK_SET) < 0) {
180 lock_last_jobs_list();
181 /* First record is number of entires */
182 num = last_jobs->size();
183 if (write(fd, &num, sizeof(num)) != sizeof(num)) {
185 Pmsg1(000, "Error writing num_items: ERR=%s\n", be.bstrerror());
188 foreach_dlist(je, last_jobs) {
189 if (write(fd, je, sizeof(struct s_last_job)) != sizeof(struct s_last_job)) {
191 Pmsg1(000, "Error writing job: ERR=%s\n", be.bstrerror());
195 unlock_last_jobs_list();
197 /* Return current address */
198 stat = lseek(fd, 0, SEEK_CUR);
205 unlock_last_jobs_list();
209 void lock_last_jobs_list()
214 void unlock_last_jobs_list()
219 /* Get an ASCII representation of the Operation being performed as an english Noun */
220 const char *JCR::get_OperationName()
226 return _("Verifying");
228 return _("Restoring");
230 return _("Archiving");
234 return _("Migration");
236 return _("Scanning");
238 return _("Unknown operation");
242 /* Get an ASCII representation of the Action being performed either an english Verb or Adjective */
243 const char *JCR::get_ActionName(bool past)
249 return (past == true) ? _("verified") : _("verify");
251 return (past == true) ? _("restored") : _("restore");
253 return (past == true) ? _("archived") : _("archive");
255 return (past == true) ? _("copied") : _("copy");
257 return (past == true) ? _("migrated") : _("migrate");
259 return (past == true) ? _("scanned") : _("scan");
261 return _("unknown action");
274 if (m_JobLevel == L_VIRTUAL_FULL) {
285 * Push a subroutine address into the job end callback stack
287 void job_end_push(JCR *jcr, void job_end_cb(JCR *jcr,void *), void *ctx)
289 jcr->job_end_push.append((void *)job_end_cb);
290 jcr->job_end_push.append(ctx);
293 /* Pop each job_end subroutine and call it */
294 static void job_end_pop(JCR *jcr)
296 void (*job_end_cb)(JCR *jcr, void *ctx);
298 for (int i=jcr->job_end_push.size()-1; i > 0; ) {
299 ctx = jcr->job_end_push.get(i--);
300 job_end_cb = (void (*)(JCR *,void *))jcr->job_end_push.get(i--);
301 job_end_cb(jcr, ctx);
306 * Create thread key for thread specific data
308 void create_jcr_key()
310 int status = pthread_key_create(&jcr_key, NULL);
313 Jmsg1(NULL, M_ABORT, 0, _("pthread key create failed: ERR=%s\n"),
314 be.bstrerror(status));
319 * Create a Job Control Record and link it into JCR chain
320 * Returns newly allocated JCR
321 * Note, since each daemon has a different JCR, he passes
324 JCR *new_jcr(int size, JCR_free_HANDLER *daemon_free_jcr)
327 MQUEUE_ITEM *item = NULL;
330 Dmsg0(dbglvl, "Enter new_jcr\n");
331 status = pthread_once(&key_once, create_jcr_key);
334 Jmsg1(NULL, M_ABORT, 0, _("pthread_once failed. ERR=%s\n"), be.bstrerror(status));
336 jcr = (JCR *)malloc(size);
337 memset(jcr, 0, size);
338 /* Note for the director, this value is changed in jobq.c */
339 jcr->my_thread_id = pthread_self();
340 jcr->msg_queue = New(dlist(item, &item->link));
341 if ((status = pthread_mutex_init(&jcr->msg_queue_mutex, NULL)) != 0) {
343 Jmsg(NULL, M_ABORT, 0, _("Could not init msg_queue mutex. ERR=%s\n"),
344 be.bstrerror(status));
346 jcr->job_end_push.init(1, false);
347 jcr->sched_time = time(NULL);
348 jcr->initial_sched_time = jcr->sched_time;
349 jcr->daemon_free_jcr = daemon_free_jcr; /* plug daemon free routine */
351 jcr->inc_use_count();
352 jcr->VolumeName = get_pool_memory(PM_FNAME);
353 jcr->VolumeName[0] = 0;
354 jcr->errmsg = get_pool_memory(PM_MESSAGE);
356 jcr->comment = get_pool_memory(PM_FNAME);
358 /* Setup some dummy values */
359 bstrncpy(jcr->Job, "*System*", sizeof(jcr->Job));
361 jcr->setJobType(JT_SYSTEM); /* internal job until defined */
362 jcr->setJobLevel(L_NONE);
363 jcr->setJobStatus(JS_Created); /* ready to run */
365 struct sigaction sigtimer;
366 sigtimer.sa_flags = 0;
367 sigtimer.sa_handler = timeout_handler;
368 sigfillset(&sigtimer.sa_mask);
369 sigaction(TIMEOUT_SIGNAL, &sigtimer, NULL);
373 * Locking jobs is a global lock that is needed
374 * so that the Director can stop new jobs from being
375 * added to the jcr chain while it processes a new
376 * conf file and does the job_end_push().
381 jcrs = New(dlist(jcr, &jcr->link));
392 * Remove a JCR from the chain
393 * NOTE! The chain must be locked prior to calling
396 static void remove_jcr(JCR *jcr)
398 Dmsg0(dbglvl, "Enter remove_jcr\n");
400 Emsg0(M_ABORT, 0, _("NULL jcr.\n"));
403 Dmsg0(dbglvl, "Leave remove_jcr\n");
407 * Free stuff common to all JCRs. N.B. Be careful to include only
408 * generic stuff in the common part of the jcr.
410 static void free_common_jcr(JCR *jcr)
412 /* Uses jcr lock/unlock */
413 remove_jcr_from_tsd(jcr);
414 jcr->set_killable(false);
416 jcr->destroy_mutex();
418 if (jcr->msg_queue) {
419 delete jcr->msg_queue;
420 jcr->msg_queue = NULL;
421 pthread_mutex_destroy(&jcr->msg_queue_mutex);
424 /* do this after closing messages */
425 if (jcr->client_name) {
426 free_pool_memory(jcr->client_name);
427 jcr->client_name = NULL;
431 free_pool_memory(jcr->attr);
435 if (jcr->sd_auth_key) {
436 free(jcr->sd_auth_key);
437 jcr->sd_auth_key = NULL;
439 if (jcr->VolumeName) {
440 free_pool_memory(jcr->VolumeName);
441 jcr->VolumeName = NULL;
444 free_bsock(jcr->dir_bsock);
447 free_pool_memory(jcr->errmsg);
454 if (jcr->RegexWhere) {
455 free(jcr->RegexWhere);
456 jcr->RegexWhere = NULL;
458 if (jcr->where_bregexp) {
459 free_bregexps(jcr->where_bregexp);
460 delete jcr->where_bregexp;
461 jcr->where_bregexp = NULL;
463 if (jcr->cached_path) {
464 free_pool_memory(jcr->cached_path);
465 jcr->cached_path = NULL;
469 free_guid_list(jcr->id_list);
473 free_pool_memory(jcr->comment);
480 * Global routine to free a jcr
483 void b_free_jcr(const char *file, int line, JCR *jcr)
485 struct s_last_job *je;
487 Dmsg3(dbglvl, "Enter free_jcr jid=%u from %s:%d\n", jcr->JobId, file, line);
491 void free_jcr(JCR *jcr)
493 struct s_last_job *je;
495 Dmsg3(dbglvl, "Enter free_jcr jid=%u use_count=%d Job=%s\n",
496 jcr->JobId, jcr->use_count(), jcr->Job);
501 jcr->dec_use_count(); /* decrement use count */
502 ASSERT2(jcr->use_count() >= 0, "JCR use_count < 0");
503 // Jmsg2(jcr, M_ERROR, 0, _("JCR use_count=%d JobId=%d\n"),
504 // jcr->use_count(), jcr->JobId);
506 if (jcr->JobId > 0) {
507 Dmsg3(dbglvl, "Dec free_jcr jid=%u use_count=%d Job=%s\n",
508 jcr->JobId, jcr->use_count(), jcr->Job);
510 if (jcr->use_count() > 0) { /* if in use */
514 if (jcr->JobId > 0) {
515 Dmsg3(dbglvl, "remove jcr jid=%u use_count=%d Job=%s\n",
516 jcr->JobId, jcr->use_count(), jcr->Job);
519 remove_jcr(jcr); /* remove Jcr from chain */
522 dequeue_messages(jcr);
523 close_msg(jcr); /* close messages for this job */
524 job_end_pop(jcr); /* pop and call hooked routines */
526 Dmsg1(dbglvl, "End job=%d\n", jcr->JobId);
528 /* Keep some statistics */
529 switch (jcr->getJobType()) {
536 /* Keep list of last jobs, but not Console where JobId==0 */
537 if (jcr->JobId > 0) {
538 lock_last_jobs_list();
540 je = (struct s_last_job *)malloc(sizeof(struct s_last_job));
541 memset(je, 0, sizeof(struct s_last_job)); /* zero in case unset fields */
542 je->Errors = jcr->JobErrors;
543 je->JobType = jcr->getJobType();
544 je->JobId = jcr->JobId;
545 je->VolSessionId = jcr->VolSessionId;
546 je->VolSessionTime = jcr->VolSessionTime;
547 bstrncpy(je->Job, jcr->Job, sizeof(je->Job));
548 je->JobFiles = jcr->JobFiles;
549 je->JobBytes = jcr->JobBytes;
550 je->JobStatus = jcr->JobStatus;
551 je->JobLevel = jcr->getJobLevel();
552 je->start_time = jcr->start_time;
553 je->end_time = time(NULL);
556 init_last_jobs_list();
558 last_jobs->append(je);
559 if (last_jobs->size() > max_last_jobs) {
560 je = (struct s_last_job *)last_jobs->first();
561 last_jobs->remove(je);
564 unlock_last_jobs_list();
571 if (jcr->daemon_free_jcr) {
572 jcr->daemon_free_jcr(jcr); /* call daemon free routine */
575 free_common_jcr(jcr);
576 close_msg(NULL); /* flush any daemon messages */
577 Dmsg0(dbglvl, "Exit free_jcr\n");
581 * Remove jcr from thread specific data, but
582 * but make sure it is us who are attached.
584 void remove_jcr_from_tsd(JCR *jcr)
586 JCR *tjcr = get_jcr_from_tsd();
588 set_jcr_in_tsd(INVALID_JCR);
592 void JCR::set_killable(bool killable)
595 my_thread_killable = killable;
600 * Put this jcr in the thread specifc data
601 * if update_thread_info is true and the jcr is valide,
602 * we update the my_thread_id in the JCR
604 void set_jcr_in_tsd(JCR *jcr)
606 int status = pthread_setspecific(jcr_key, (void *)jcr);
609 Jmsg1(jcr, M_ABORT, 0, _("pthread_setspecific failed: ERR=%s\n"),
610 be.bstrerror(status));
614 void JCR::my_thread_send_signal(int sig)
616 lock_jcr_chain(); /* use global lock */
621 if (this->is_killable() &&
622 !pthread_equal(this->my_thread_id, pthread_self()))
624 Dmsg1(800, "Send kill to jid=%d\n", this->JobId);
625 pthread_kill(this->my_thread_id, sig);
626 this->exiting = true;
628 } else if (!this->is_killable()) {
629 Dmsg1(10, "Warning, can't send kill to jid=%d\n", this->JobId);
637 * Give me the jcr that is attached to this thread
639 JCR *get_jcr_from_tsd()
641 JCR *jcr = (JCR *)pthread_getspecific(jcr_key);
642 // printf("get_jcr_from_tsd: jcr=%p\n", jcr);
643 /* set any INVALID_JCR to NULL which the rest of Bacula understands */
644 if (jcr == INVALID_JCR) {
652 * Find which JobId corresponds to the current thread
654 uint32_t get_jobid_from_tsd()
658 jcr = get_jcr_from_tsd();
659 // printf("get_jobid_from_tsr: jcr=%p\n", jcr);
661 JobId = (uint32_t)jcr->JobId;
667 * Given a JobId, find the JCR
668 * Returns: jcr on success
671 JCR *get_jcr_by_id(uint32_t JobId)
676 if (jcr->JobId == JobId) {
677 jcr->inc_use_count();
678 Dmsg3(dbglvl, "Inc get_jcr jid=%u use_count=%d Job=%s\n",
679 jcr->JobId, jcr->use_count(), jcr->Job);
688 * Given a thread id, find the JobId
689 * Returns: JobId on success
692 uint32_t get_jobid_from_tid(pthread_t tid)
698 if (pthread_equal(jcr->my_thread_id, tid)) {
712 * Given a SessionId and SessionTime, find the JCR
713 * Returns: jcr on success
716 JCR *get_jcr_by_session(uint32_t SessionId, uint32_t SessionTime)
721 if (jcr->VolSessionId == SessionId &&
722 jcr->VolSessionTime == SessionTime) {
723 jcr->inc_use_count();
724 Dmsg3(dbglvl, "Inc get_jcr jid=%u use_count=%d Job=%s\n",
725 jcr->JobId, jcr->use_count(), jcr->Job);
735 * Given a Job, find the JCR
736 * compares on the number of characters in Job
737 * thus allowing partial matches.
738 * Returns: jcr on success
741 JCR *get_jcr_by_partial_name(char *Job)
751 if (strncmp(Job, jcr->Job, len) == 0) {
752 jcr->inc_use_count();
753 Dmsg3(dbglvl, "Inc get_jcr jid=%u use_count=%d Job=%s\n",
754 jcr->JobId, jcr->use_count(), jcr->Job);
764 * Given a Job, find the JCR
765 * requires an exact match of names.
766 * Returns: jcr on success
769 JCR *get_jcr_by_full_name(char *Job)
777 if (strcmp(jcr->Job, Job) == 0) {
778 jcr->inc_use_count();
779 Dmsg3(dbglvl, "Inc get_jcr jid=%u use_count=%d Job=%s\n",
780 jcr->JobId, jcr->use_count(), jcr->Job);
788 static void update_wait_time(JCR *jcr, int newJobStatus)
790 bool enter_in_waittime;
791 int oldJobStatus = jcr->JobStatus;
793 switch (newJobStatus) {
798 case JS_WaitStoreRes:
800 case JS_WaitClientRes:
802 case JS_WaitPriority:
803 enter_in_waittime = true;
806 enter_in_waittime = false; /* not a Wait situation */
811 * If we were previously waiting and are not any more
812 * we want to update the wait_time variable, which is
813 * the start of waiting.
815 switch (oldJobStatus) {
820 case JS_WaitStoreRes:
822 case JS_WaitClientRes:
824 case JS_WaitPriority:
825 if (!enter_in_waittime) { /* we get out the wait time */
826 jcr->wait_time_sum += (time(NULL) - jcr->wait_time);
831 /* if wait state is new, we keep current time for watchdog MaxWaitTime */
833 if (enter_in_waittime) {
834 jcr->wait_time = time(NULL);
841 * Priority runs from 0 (lowest) to 10 (highest)
843 static int get_status_priority(int JobStatus)
850 case JS_ErrorTerminated:
866 * Send Job status to Director
868 bool JCR::sendJobStatus()
871 return dir_bsock->fsend(Job_status, Job, JobStatus);
877 * Set and send Job status to Director
879 bool JCR::sendJobStatus(int aJobStatus)
881 if (!is_JobStatus(aJobStatus)) {
882 setJobStatus(aJobStatus);
884 return dir_bsock->fsend(Job_status, Job, JobStatus);
890 void JCR::setJobStarted()
893 job_started_time = time(NULL);
896 void JCR::setJobStatus(int newJobStatus)
898 int priority, old_priority;
899 int oldJobStatus = JobStatus;
900 priority = get_status_priority(newJobStatus);
901 old_priority = get_status_priority(oldJobStatus);
903 Dmsg2(800, "set_jcr_job_status(%s, %c)\n", Job, newJobStatus);
905 /* Update wait_time depending on newJobStatus and oldJobStatus */
906 update_wait_time(this, newJobStatus);
909 * For a set of errors, ... keep the current status
910 * so it isn't lost. For all others, set it.
912 Dmsg2(800, "OnEntry JobStatus=%c newJobstatus=%c\n", oldJobStatus, newJobStatus);
914 * If status priority is > than proposed new status, change it.
915 * If status priority == new priority and both are zero, take
917 * If it is not zero, then we keep the first non-zero "error" that
920 if (priority > old_priority || (
921 priority == 0 && old_priority == 0)) {
922 Dmsg4(800, "Set new stat. old: %c,%d new: %c,%d\n",
923 JobStatus, old_priority, newJobStatus, priority);
924 JobStatus = newJobStatus; /* replace with new status */
927 if (oldJobStatus != JobStatus) {
928 Dmsg2(800, "leave setJobStatus old=%c new=%c\n", oldJobStatus, newJobStatus);
929 // generate_plugin_event(this, bEventStatusChange, NULL);
933 #ifdef TRACE_JCR_CHAIN
934 static int lock_count = 0;
940 #ifdef TRACE_JCR_CHAIN
941 static void b_lock_jcr_chain(const char *fname, int line)
943 static void lock_jcr_chain()
946 #ifdef TRACE_JCR_CHAIN
947 Dmsg3(dbglvl, "Lock jcr chain %d from %s:%d\n", ++lock_count, fname, line);
955 #ifdef TRACE_JCR_CHAIN
956 static void b_unlock_jcr_chain(const char *fname, int line)
958 static void unlock_jcr_chain()
961 #ifdef TRACE_JCR_CHAIN
962 Dmsg3(dbglvl, "Unlock jcr chain %d from %s:%d\n", lock_count--, fname, line);
968 * Start walk of jcr chain
969 * The proper way to walk the jcr chain is:
976 * It is possible to leave out the endeach_jcr(jcr), but
977 * in that case, the last jcr referenced must be explicitly
983 JCR *jcr_walk_start()
987 jcr = (JCR *)jcrs->first();
989 jcr->inc_use_count();
990 if (jcr->JobId > 0) {
991 Dmsg3(dbglvl, "Inc walk_start jid=%u use_count=%d Job=%s\n",
992 jcr->JobId, jcr->use_count(), jcr->Job);
1000 * Get next jcr from chain, and release current one
1002 JCR *jcr_walk_next(JCR *prev_jcr)
1007 jcr = (JCR *)jcrs->next(prev_jcr);
1009 jcr->inc_use_count();
1010 if (jcr->JobId > 0) {
1011 Dmsg3(dbglvl, "Inc walk_next jid=%u use_count=%d Job=%s\n",
1012 jcr->JobId, jcr->use_count(), jcr->Job);
1023 * Release last jcr referenced
1025 void jcr_walk_end(JCR *jcr)
1028 if (jcr->JobId > 0) {
1029 Dmsg3(dbglvl, "Free walk_end jid=%u use_count=%d Job=%s\n",
1030 jcr->JobId, jcr->use_count(), jcr->Job);
1037 * Return number of Jobs
1045 for (jcr = (JCR *)jcrs->first(); (jcr = (JCR *)jcrs->next(jcr)); ) {
1046 if (jcr->JobId > 0) {
1056 * Setup to call the timeout check routine every 30 seconds
1057 * This routine will check any timers that have been enabled.
1059 bool init_jcr_subsystem(void)
1061 watchdog_t *wd = new_watchdog();
1063 wd->one_shot = false;
1064 wd->interval = 30; /* FIXME: should be configurable somewhere, even
1065 if only with a #define */
1066 wd->callback = jcr_timeout_check;
1068 register_watchdog(wd);
1073 static void jcr_timeout_check(watchdog_t *self)
1079 Dmsg0(dbglvl, "Start JCR timeout checks\n");
1081 /* Walk through all JCRs checking if any one is
1082 * blocked for more than specified max time.
1085 Dmsg2(dbglvl, "jcr_timeout_check JobId=%u jcr=0x%x\n", jcr->JobId, jcr);
1086 if (jcr->JobId == 0) {
1089 bs = jcr->store_bsock;
1091 timer_start = bs->timer_start;
1092 if (timer_start && (watchdog_time - timer_start) > bs->timeout) {
1093 bs->timer_start = 0; /* turn off timer */
1094 bs->set_timed_out();
1095 Qmsg(jcr, M_ERROR, 0, _(
1096 "Watchdog sending kill after %d secs to thread stalled reading Storage daemon.\n"),
1097 (int)(watchdog_time - timer_start));
1098 jcr->my_thread_send_signal(TIMEOUT_SIGNAL);
1101 bs = jcr->file_bsock;
1103 timer_start = bs->timer_start;
1104 if (timer_start && (watchdog_time - timer_start) > bs->timeout) {
1105 bs->timer_start = 0; /* turn off timer */
1106 bs->set_timed_out();
1107 Qmsg(jcr, M_ERROR, 0, _(
1108 "Watchdog sending kill after %d secs to thread stalled reading File daemon.\n"),
1109 (int)(watchdog_time - timer_start));
1110 jcr->my_thread_send_signal(TIMEOUT_SIGNAL);
1113 bs = jcr->dir_bsock;
1115 timer_start = bs->timer_start;
1116 if (timer_start && (watchdog_time - timer_start) > bs->timeout) {
1117 bs->timer_start = 0; /* turn off timer */
1118 bs->set_timed_out();
1119 Qmsg(jcr, M_ERROR, 0, _(
1120 "Watchdog sending kill after %d secs to thread stalled reading Director.\n"),
1121 (int)(watchdog_time - timer_start));
1122 jcr->my_thread_send_signal(TIMEOUT_SIGNAL);
1128 Dmsg0(dbglvl, "Finished JCR timeout checks\n");
1132 * Return next JobId from comma separated list
1135 * 1 if next JobId returned
1136 * 0 if no more JobIds are in list
1137 * -1 there is an error
1139 int get_next_jobid_from_list(char **p, uint32_t *JobId)
1141 const int maxlen = 30;
1142 char jobid[maxlen+1];
1146 for (int i=0; i<maxlen; i++) {
1149 } else if (*q == ',') {
1156 if (jobid[0] == 0) {
1158 } else if (!is_a_number(jobid)) {
1159 return -1; /* error */
1162 *JobId = str_to_int64(jobid);
1167 * Timeout signal comes here
1169 extern "C" void timeout_handler(int sig)
1171 return; /* thus interrupting the function */
1174 /* Used to display specific daemon information after a fatal signal
1175 * (like BDB in the director)
1177 #define MAX_DBG_HOOK 10
1178 static dbg_jcr_hook_t *dbg_jcr_hooks[MAX_DBG_HOOK];
1179 static int dbg_jcr_handler_count;
1181 void dbg_jcr_add_hook(dbg_jcr_hook_t *hook)
1183 ASSERT(dbg_jcr_handler_count < MAX_DBG_HOOK);
1184 dbg_jcr_hooks[dbg_jcr_handler_count++] = hook;
1187 /* on win32, the pthread_t is a struct, so we don't display it */
1188 #ifdef HAVE_MINGW_W64
1189 # define get_threadid(a) (void *)0
1191 # define get_threadid(a) (void *)(a)
1196 * This function should be used ONLY after a fatal signal. We walk through the
1197 * JCR chain without doing any lock, Bacula should not be running.
1199 void dbg_print_jcr(FILE *fp)
1201 char buf1[128], buf2[128], buf3[128], buf4[128];
1207 fprintf(fp, "Attempt to dump current JCRs. njcrs=%d\n", jcrs->size());
1209 for (JCR *jcr = (JCR *)jcrs->first(); jcr ; jcr = (JCR *)jcrs->next(jcr)) {
1210 fprintf(fp, "threadid=%p JobId=%d JobStatus=%c jcr=%p name=%s\n",
1211 get_threadid(jcr->my_thread_id), (int)jcr->JobId, jcr->JobStatus, jcr, jcr->Job);
1212 fprintf(fp, "\tuse_count=%i killable=%d\n",
1213 jcr->use_count(), jcr->is_killable());
1214 fprintf(fp, "\tJobType=%c JobLevel=%c\n",
1215 jcr->getJobType(), jcr->getJobLevel());
1216 bstrftime(buf1, sizeof(buf1), jcr->sched_time);
1217 bstrftime(buf2, sizeof(buf2), jcr->start_time);
1218 bstrftime(buf3, sizeof(buf3), jcr->end_time);
1219 bstrftime(buf4, sizeof(buf4), jcr->wait_time);
1220 fprintf(fp, "\tsched_time=%s start_time=%s\n\tend_time=%s wait_time=%s\n",
1221 buf1, buf2, buf3, buf4);
1222 fprintf(fp, "\tdb=%p db_batch=%p batch_started=%i\n",
1223 jcr->db, jcr->db_batch, jcr->batch_started);
1226 * Call all the jcr debug hooks
1228 for(int i=0; i < dbg_jcr_handler_count; i++) {
1229 dbg_jcr_hook_t *hook = dbg_jcr_hooks[i];