2 Bacula(R) - The Network Backup Solution
4 Copyright (C) 2000-2016 Kern Sibbald
6 The original author of Bacula is Kern Sibbald, with contributions
7 from many others, a complete list can be found in the file AUTHORS.
9 You may use this file and others of this release according to the
10 license defined in the LICENSE file, which includes the Affero General
11 Public License, v3.0 ("AGPLv3") and some additional permissions and
12 terms pursuant to its AGPLv3 Section 7.
14 This notice must be preserved when any source code is
15 conveyed and/or propagated.
17 Bacula(R) is a registered trademark of Kern Sibbald.
20 * Manipulation routines for Job Control Records and
21 * handling of last_jobs_list.
23 * Kern E. Sibbald, December 2000
25 * These routines are thread safe.
27 * The job list routines were re-written in May 2005 to
28 * eliminate the global lock while traversing the list, and
29 * to use the dlist subroutines. The locking is now done
30 * on the list each time the list is modified or traversed.
31 * That is it is "micro-locked" rather than globally locked.
32 * The result is that there is one lock/unlock for each entry
33 * in the list while traversing it rather than a single lock
34 * at the beginning of a traversal and one at the end. This
35 * incurs slightly more overhead, but effectively eliminates
36 * the possibilty of race conditions. In addition, with the
37 * exception of the global locking of the list during the
38 * re-reading of the config file, no recursion is needed.
45 const int dbglvl = 3400;
47 /* External variables we reference */
49 /* External referenced functions */
50 void free_bregexps(alist *bregexps);
52 /* Forward referenced functions */
53 extern "C" void timeout_handler(int sig);
54 static void jcr_timeout_check(watchdog_t *self);
55 #ifdef TRACE_JCR_CHAIN
56 static void b_lock_jcr_chain(const char *filen, int line);
57 static void b_unlock_jcr_chain(const char *filen, int line);
58 #define lock_jcr_chain() b_lock_jcr_chain(__FILE__, __LINE__);
59 #define unlock_jcr_chain() b_unlock_jcr_chain(__FILE__, __LINE__);
61 static void lock_jcr_chain();
62 static void unlock_jcr_chain();
67 dlist *last_jobs = NULL;
68 const int max_last_jobs = 10;
70 static dlist *jcrs = NULL; /* JCR chain */
71 static pthread_mutex_t jcr_lock = PTHREAD_MUTEX_INITIALIZER;
73 static pthread_mutex_t job_start_mutex = PTHREAD_MUTEX_INITIALIZER;
75 static pthread_mutex_t last_jobs_mutex = PTHREAD_MUTEX_INITIALIZER;
77 static pthread_key_t jcr_key; /* Pointer to jcr for each thread */
79 pthread_once_t key_once = PTHREAD_ONCE_INIT;
81 static char Job_status[] = "Status Job=%s JobStatus=%d\n";
94 void init_last_jobs_list()
97 struct s_last_job *job_entry = NULL;
99 last_jobs = New(dlist(job_entry, &job_entry->link));
102 jcrs = New(dlist(jcr, &jcr->link));
106 void term_last_jobs_list()
109 lock_last_jobs_list();
110 while (!last_jobs->empty()) {
111 void *je = last_jobs->first();
112 last_jobs->remove(je);
117 unlock_last_jobs_list();
125 bool read_last_jobs_list(int fd, uint64_t addr)
127 struct s_last_job *je, job;
131 Dmsg1(100, "read_last_jobs seek to %d\n", (int)addr);
132 if (addr == 0 || lseek(fd, (boffset_t)addr, SEEK_SET) < 0) {
135 if (read(fd, &num, sizeof(num)) != sizeof(num)) {
138 Dmsg1(100, "Read num_items=%d\n", num);
139 if (num > 4 * max_last_jobs) { /* sanity check */
142 lock_last_jobs_list();
143 for ( ; num; num--) {
144 if (read(fd, &job, sizeof(job)) != sizeof(job)) {
146 Pmsg1(000, "Read job entry. ERR=%s\n", be.bstrerror());
151 je = (struct s_last_job *)malloc(sizeof(struct s_last_job));
152 memcpy((char *)je, (char *)&job, sizeof(job));
154 init_last_jobs_list();
156 last_jobs->append(je);
157 if (last_jobs->size() > max_last_jobs) {
158 je = (struct s_last_job *)last_jobs->first();
159 last_jobs->remove(je);
164 unlock_last_jobs_list();
168 uint64_t write_last_jobs_list(int fd, uint64_t addr)
170 struct s_last_job *je;
174 Dmsg1(100, "write_last_jobs seek to %d\n", (int)addr);
175 if (lseek(fd, (boffset_t)addr, SEEK_SET) < 0) {
179 lock_last_jobs_list();
180 /* First record is number of entires */
181 num = last_jobs->size();
182 if (write(fd, &num, sizeof(num)) != sizeof(num)) {
184 Pmsg1(000, "Error writing num_items: ERR=%s\n", be.bstrerror());
187 foreach_dlist(je, last_jobs) {
188 if (write(fd, je, sizeof(struct s_last_job)) != sizeof(struct s_last_job)) {
190 Pmsg1(000, "Error writing job: ERR=%s\n", be.bstrerror());
194 unlock_last_jobs_list();
196 /* Return current address */
197 stat = lseek(fd, 0, SEEK_CUR);
204 unlock_last_jobs_list();
208 void lock_last_jobs_list()
213 void unlock_last_jobs_list()
218 /* Get an ASCII representation of the Operation being performed as an english Noun */
219 const char *JCR::get_OperationName()
225 return _("Verifying");
227 return _("Restoring");
229 return _("Archiving");
233 return _("Migration");
235 return _("Scanning");
237 return _("Unknown operation");
241 /* Get an ASCII representation of the Action being performed either an english Verb or Adjective */
242 const char *JCR::get_ActionName(bool past)
248 return (past == true) ? _("verified") : _("verify");
250 return (past == true) ? _("restored") : _("restore");
252 return (past == true) ? _("archived") : _("archive");
254 return (past == true) ? _("copied") : _("copy");
256 return (past == true) ? _("migrated") : _("migrate");
258 return (past == true) ? _("scanned") : _("scan");
260 return _("unknown action");
273 if (m_JobLevel == L_VIRTUAL_FULL) {
284 * Push a subroutine address into the job end callback stack
286 void job_end_push(JCR *jcr, void job_end_cb(JCR *jcr,void *), void *ctx)
288 jcr->job_end_push.append((void *)job_end_cb);
289 jcr->job_end_push.append(ctx);
292 /* Pop each job_end subroutine and call it */
293 static void job_end_pop(JCR *jcr)
295 void (*job_end_cb)(JCR *jcr, void *ctx);
297 for (int i=jcr->job_end_push.size()-1; i > 0; ) {
298 ctx = jcr->job_end_push.get(i--);
299 job_end_cb = (void (*)(JCR *,void *))jcr->job_end_push.get(i--);
300 job_end_cb(jcr, ctx);
305 * Create thread key for thread specific data
307 void create_jcr_key()
309 int status = pthread_key_create(&jcr_key, NULL);
312 Jmsg1(NULL, M_ABORT, 0, _("pthread key create failed: ERR=%s\n"),
313 be.bstrerror(status));
318 * Create a Job Control Record and link it into JCR chain
319 * Returns newly allocated JCR
320 * Note, since each daemon has a different JCR, he passes
323 JCR *new_jcr(int size, JCR_free_HANDLER *daemon_free_jcr)
326 MQUEUE_ITEM *item = NULL;
329 Dmsg0(dbglvl, "Enter new_jcr\n");
330 status = pthread_once(&key_once, create_jcr_key);
333 Jmsg1(NULL, M_ABORT, 0, _("pthread_once failed. ERR=%s\n"), be.bstrerror(status));
335 jcr = (JCR *)malloc(size);
336 memset(jcr, 0, size);
337 /* Note for the director, this value is changed in jobq.c */
338 jcr->my_thread_id = pthread_self();
339 jcr->msg_queue = New(dlist(item, &item->link));
340 if ((status = pthread_mutex_init(&jcr->msg_queue_mutex, NULL)) != 0) {
342 Jmsg(NULL, M_ABORT, 0, _("Could not init msg_queue mutex. ERR=%s\n"),
343 be.bstrerror(status));
345 jcr->job_end_push.init(1, false);
346 jcr->sched_time = time(NULL);
347 jcr->initial_sched_time = jcr->sched_time;
348 jcr->daemon_free_jcr = daemon_free_jcr; /* plug daemon free routine */
350 jcr->inc_use_count();
351 jcr->VolumeName = get_pool_memory(PM_FNAME);
352 jcr->VolumeName[0] = 0;
353 jcr->errmsg = get_pool_memory(PM_MESSAGE);
355 jcr->comment = get_pool_memory(PM_FNAME);
357 /* Setup some dummy values */
358 bstrncpy(jcr->Job, "*System*", sizeof(jcr->Job));
360 jcr->setJobType(JT_SYSTEM); /* internal job until defined */
361 jcr->setJobLevel(L_NONE);
362 jcr->setJobStatus(JS_Created); /* ready to run */
364 struct sigaction sigtimer;
365 sigtimer.sa_flags = 0;
366 sigtimer.sa_handler = timeout_handler;
367 sigfillset(&sigtimer.sa_mask);
368 sigaction(TIMEOUT_SIGNAL, &sigtimer, NULL);
372 * Locking jobs is a global lock that is needed
373 * so that the Director can stop new jobs from being
374 * added to the jcr chain while it processes a new
375 * conf file and does the job_end_push().
380 jcrs = New(dlist(jcr, &jcr->link));
391 * Remove a JCR from the chain
392 * NOTE! The chain must be locked prior to calling
395 static void remove_jcr(JCR *jcr)
397 Dmsg0(dbglvl, "Enter remove_jcr\n");
399 Emsg0(M_ABORT, 0, _("NULL jcr.\n"));
402 Dmsg0(dbglvl, "Leave remove_jcr\n");
406 * Free stuff common to all JCRs. N.B. Be careful to include only
407 * generic stuff in the common part of the jcr.
409 static void free_common_jcr(JCR *jcr)
411 /* Uses jcr lock/unlock */
412 remove_jcr_from_tsd(jcr);
413 jcr->set_killable(false);
415 jcr->destroy_mutex();
417 if (jcr->msg_queue) {
418 delete jcr->msg_queue;
419 jcr->msg_queue = NULL;
420 pthread_mutex_destroy(&jcr->msg_queue_mutex);
423 /* do this after closing messages */
424 if (jcr->client_name) {
425 free_pool_memory(jcr->client_name);
426 jcr->client_name = NULL;
430 free_pool_memory(jcr->attr);
434 if (jcr->sd_auth_key) {
435 free(jcr->sd_auth_key);
436 jcr->sd_auth_key = NULL;
438 if (jcr->VolumeName) {
439 free_pool_memory(jcr->VolumeName);
440 jcr->VolumeName = NULL;
443 free_bsock(jcr->dir_bsock);
446 free_pool_memory(jcr->errmsg);
453 if (jcr->RegexWhere) {
454 free(jcr->RegexWhere);
455 jcr->RegexWhere = NULL;
457 if (jcr->where_bregexp) {
458 free_bregexps(jcr->where_bregexp);
459 delete jcr->where_bregexp;
460 jcr->where_bregexp = NULL;
462 if (jcr->cached_path) {
463 free_pool_memory(jcr->cached_path);
464 jcr->cached_path = NULL;
468 free_guid_list(jcr->id_list);
472 free_pool_memory(jcr->comment);
479 * Global routine to free a jcr
482 void b_free_jcr(const char *file, int line, JCR *jcr)
484 struct s_last_job *je;
486 Dmsg3(dbglvl, "Enter free_jcr jid=%u from %s:%d\n", jcr->JobId, file, line);
490 void free_jcr(JCR *jcr)
492 struct s_last_job *je;
494 Dmsg3(dbglvl, "Enter free_jcr jid=%u use_count=%d Job=%s\n",
495 jcr->JobId, jcr->use_count(), jcr->Job);
500 jcr->dec_use_count(); /* decrement use count */
501 ASSERT2(jcr->use_count() >= 0, "JCR use_count < 0");
502 // Jmsg2(jcr, M_ERROR, 0, _("JCR use_count=%d JobId=%d\n"),
503 // jcr->use_count(), jcr->JobId);
505 if (jcr->JobId > 0) {
506 Dmsg3(dbglvl, "Dec free_jcr jid=%u use_count=%d Job=%s\n",
507 jcr->JobId, jcr->use_count(), jcr->Job);
509 if (jcr->use_count() > 0) { /* if in use */
513 if (jcr->JobId > 0) {
514 Dmsg3(dbglvl, "remove jcr jid=%u use_count=%d Job=%s\n",
515 jcr->JobId, jcr->use_count(), jcr->Job);
518 remove_jcr(jcr); /* remove Jcr from chain */
521 dequeue_messages(jcr);
522 close_msg(jcr); /* close messages for this job */
523 job_end_pop(jcr); /* pop and call hooked routines */
525 Dmsg1(dbglvl, "End job=%d\n", jcr->JobId);
527 /* Keep some statistics */
528 switch (jcr->getJobType()) {
535 /* Keep list of last jobs, but not Console where JobId==0 */
536 if (jcr->JobId > 0) {
537 lock_last_jobs_list();
539 je = (struct s_last_job *)malloc(sizeof(struct s_last_job));
540 memset(je, 0, sizeof(struct s_last_job)); /* zero in case unset fields */
541 je->Errors = jcr->JobErrors;
542 je->JobType = jcr->getJobType();
543 je->JobId = jcr->JobId;
544 je->VolSessionId = jcr->VolSessionId;
545 je->VolSessionTime = jcr->VolSessionTime;
546 bstrncpy(je->Job, jcr->Job, sizeof(je->Job));
547 je->JobFiles = jcr->JobFiles;
548 je->JobBytes = jcr->JobBytes;
549 je->JobStatus = jcr->JobStatus;
550 je->JobLevel = jcr->getJobLevel();
551 je->start_time = jcr->start_time;
552 je->end_time = time(NULL);
555 init_last_jobs_list();
557 last_jobs->append(je);
558 if (last_jobs->size() > max_last_jobs) {
559 je = (struct s_last_job *)last_jobs->first();
560 last_jobs->remove(je);
563 unlock_last_jobs_list();
570 if (jcr->daemon_free_jcr) {
571 jcr->daemon_free_jcr(jcr); /* call daemon free routine */
574 free_common_jcr(jcr);
575 close_msg(NULL); /* flush any daemon messages */
576 Dmsg0(dbglvl, "Exit free_jcr\n");
580 * Remove jcr from thread specific data, but
581 * but make sure it is us who are attached.
583 void remove_jcr_from_tsd(JCR *jcr)
585 JCR *tjcr = get_jcr_from_tsd();
587 set_jcr_in_tsd(INVALID_JCR);
591 void JCR::set_killable(bool killable)
594 my_thread_killable = killable;
599 * Put this jcr in the thread specifc data
600 * if update_thread_info is true and the jcr is valide,
601 * we update the my_thread_id in the JCR
603 void set_jcr_in_tsd(JCR *jcr)
605 int status = pthread_setspecific(jcr_key, (void *)jcr);
608 Jmsg1(jcr, M_ABORT, 0, _("pthread_setspecific failed: ERR=%s\n"),
609 be.bstrerror(status));
613 void JCR::my_thread_send_signal(int sig)
615 lock_jcr_chain(); /* use global lock */
620 if (this->is_killable() &&
621 !pthread_equal(this->my_thread_id, pthread_self()))
623 Dmsg1(800, "Send kill to jid=%d\n", this->JobId);
624 pthread_kill(this->my_thread_id, sig);
625 this->exiting = true;
627 } else if (!this->is_killable()) {
628 Dmsg1(10, "Warning, can't send kill to jid=%d\n", this->JobId);
636 * Give me the jcr that is attached to this thread
638 JCR *get_jcr_from_tsd()
640 JCR *jcr = (JCR *)pthread_getspecific(jcr_key);
641 // printf("get_jcr_from_tsd: jcr=%p\n", jcr);
642 /* set any INVALID_JCR to NULL which the rest of Bacula understands */
643 if (jcr == INVALID_JCR) {
651 * Find which JobId corresponds to the current thread
653 uint32_t get_jobid_from_tsd()
657 jcr = get_jcr_from_tsd();
658 // printf("get_jobid_from_tsr: jcr=%p\n", jcr);
660 JobId = (uint32_t)jcr->JobId;
666 * Given a JobId, find the JCR
667 * Returns: jcr on success
670 JCR *get_jcr_by_id(uint32_t JobId)
675 if (jcr->JobId == JobId) {
676 jcr->inc_use_count();
677 Dmsg3(dbglvl, "Inc get_jcr jid=%u use_count=%d Job=%s\n",
678 jcr->JobId, jcr->use_count(), jcr->Job);
687 * Given a thread id, find the JobId
688 * Returns: JobId on success
691 uint32_t get_jobid_from_tid(pthread_t tid)
697 if (pthread_equal(jcr->my_thread_id, tid)) {
711 * Given a SessionId and SessionTime, find the JCR
712 * Returns: jcr on success
715 JCR *get_jcr_by_session(uint32_t SessionId, uint32_t SessionTime)
720 if (jcr->VolSessionId == SessionId &&
721 jcr->VolSessionTime == SessionTime) {
722 jcr->inc_use_count();
723 Dmsg3(dbglvl, "Inc get_jcr jid=%u use_count=%d Job=%s\n",
724 jcr->JobId, jcr->use_count(), jcr->Job);
734 * Given a Job, find the JCR
735 * compares on the number of characters in Job
736 * thus allowing partial matches.
737 * Returns: jcr on success
740 JCR *get_jcr_by_partial_name(char *Job)
750 if (strncmp(Job, jcr->Job, len) == 0) {
751 jcr->inc_use_count();
752 Dmsg3(dbglvl, "Inc get_jcr jid=%u use_count=%d Job=%s\n",
753 jcr->JobId, jcr->use_count(), jcr->Job);
763 * Given a Job, find the JCR
764 * requires an exact match of names.
765 * Returns: jcr on success
768 JCR *get_jcr_by_full_name(char *Job)
776 if (strcmp(jcr->Job, Job) == 0) {
777 jcr->inc_use_count();
778 Dmsg3(dbglvl, "Inc get_jcr jid=%u use_count=%d Job=%s\n",
779 jcr->JobId, jcr->use_count(), jcr->Job);
787 static void update_wait_time(JCR *jcr, int newJobStatus)
789 bool enter_in_waittime;
790 int oldJobStatus = jcr->JobStatus;
792 switch (newJobStatus) {
797 case JS_WaitStoreRes:
799 case JS_WaitClientRes:
801 case JS_WaitPriority:
802 enter_in_waittime = true;
805 enter_in_waittime = false; /* not a Wait situation */
810 * If we were previously waiting and are not any more
811 * we want to update the wait_time variable, which is
812 * the start of waiting.
814 switch (oldJobStatus) {
819 case JS_WaitStoreRes:
821 case JS_WaitClientRes:
823 case JS_WaitPriority:
824 if (!enter_in_waittime) { /* we get out the wait time */
825 jcr->wait_time_sum += (time(NULL) - jcr->wait_time);
830 /* if wait state is new, we keep current time for watchdog MaxWaitTime */
832 if (enter_in_waittime) {
833 jcr->wait_time = time(NULL);
840 * Priority runs from 0 (lowest) to 10 (highest)
842 static int get_status_priority(int JobStatus)
849 case JS_ErrorTerminated:
865 * Send Job status to Director
867 bool JCR::sendJobStatus()
870 return dir_bsock->fsend(Job_status, Job, JobStatus);
876 * Set and send Job status to Director
878 bool JCR::sendJobStatus(int aJobStatus)
880 if (!is_JobStatus(aJobStatus)) {
881 setJobStatus(aJobStatus);
883 return dir_bsock->fsend(Job_status, Job, JobStatus);
889 void JCR::setJobStarted()
892 job_started_time = time(NULL);
895 void JCR::setJobStatus(int newJobStatus)
897 int priority, old_priority;
898 int oldJobStatus = JobStatus;
899 priority = get_status_priority(newJobStatus);
900 old_priority = get_status_priority(oldJobStatus);
902 Dmsg2(800, "set_jcr_job_status(%s, %c)\n", Job, newJobStatus);
904 /* Update wait_time depending on newJobStatus and oldJobStatus */
905 update_wait_time(this, newJobStatus);
908 * For a set of errors, ... keep the current status
909 * so it isn't lost. For all others, set it.
911 Dmsg2(800, "OnEntry JobStatus=%c newJobstatus=%c\n", oldJobStatus, newJobStatus);
913 * If status priority is > than proposed new status, change it.
914 * If status priority == new priority and both are zero, take
916 * If it is not zero, then we keep the first non-zero "error" that
919 if (priority > old_priority || (
920 priority == 0 && old_priority == 0)) {
921 Dmsg4(800, "Set new stat. old: %c,%d new: %c,%d\n",
922 JobStatus, old_priority, newJobStatus, priority);
923 JobStatus = newJobStatus; /* replace with new status */
926 if (oldJobStatus != JobStatus) {
927 Dmsg2(800, "leave setJobStatus old=%c new=%c\n", oldJobStatus, newJobStatus);
928 // generate_plugin_event(this, bEventStatusChange, NULL);
932 #ifdef TRACE_JCR_CHAIN
933 static int lock_count = 0;
939 #ifdef TRACE_JCR_CHAIN
940 static void b_lock_jcr_chain(const char *fname, int line)
942 static void lock_jcr_chain()
945 #ifdef TRACE_JCR_CHAIN
946 Dmsg3(dbglvl, "Lock jcr chain %d from %s:%d\n", ++lock_count, fname, line);
954 #ifdef TRACE_JCR_CHAIN
955 static void b_unlock_jcr_chain(const char *fname, int line)
957 static void unlock_jcr_chain()
960 #ifdef TRACE_JCR_CHAIN
961 Dmsg3(dbglvl, "Unlock jcr chain %d from %s:%d\n", lock_count--, fname, line);
967 * Start walk of jcr chain
968 * The proper way to walk the jcr chain is:
975 * It is possible to leave out the endeach_jcr(jcr), but
976 * in that case, the last jcr referenced must be explicitly
982 JCR *jcr_walk_start()
986 jcr = (JCR *)jcrs->first();
988 jcr->inc_use_count();
989 if (jcr->JobId > 0) {
990 Dmsg3(dbglvl, "Inc walk_start jid=%u use_count=%d Job=%s\n",
991 jcr->JobId, jcr->use_count(), jcr->Job);
999 * Get next jcr from chain, and release current one
1001 JCR *jcr_walk_next(JCR *prev_jcr)
1006 jcr = (JCR *)jcrs->next(prev_jcr);
1008 jcr->inc_use_count();
1009 if (jcr->JobId > 0) {
1010 Dmsg3(dbglvl, "Inc walk_next jid=%u use_count=%d Job=%s\n",
1011 jcr->JobId, jcr->use_count(), jcr->Job);
1022 * Release last jcr referenced
1024 void jcr_walk_end(JCR *jcr)
1027 if (jcr->JobId > 0) {
1028 Dmsg3(dbglvl, "Free walk_end jid=%u use_count=%d Job=%s\n",
1029 jcr->JobId, jcr->use_count(), jcr->Job);
1036 * Return number of Jobs
1044 for (jcr = (JCR *)jcrs->first(); (jcr = (JCR *)jcrs->next(jcr)); ) {
1045 if (jcr->JobId > 0) {
1055 * Setup to call the timeout check routine every 30 seconds
1056 * This routine will check any timers that have been enabled.
1058 bool init_jcr_subsystem(void)
1060 watchdog_t *wd = new_watchdog();
1062 wd->one_shot = false;
1063 wd->interval = 30; /* FIXME: should be configurable somewhere, even
1064 if only with a #define */
1065 wd->callback = jcr_timeout_check;
1067 register_watchdog(wd);
1072 static void jcr_timeout_check(watchdog_t *self)
1078 Dmsg0(dbglvl, "Start JCR timeout checks\n");
1080 /* Walk through all JCRs checking if any one is
1081 * blocked for more than specified max time.
1084 Dmsg2(dbglvl, "jcr_timeout_check JobId=%u jcr=0x%x\n", jcr->JobId, jcr);
1085 if (jcr->JobId == 0) {
1088 bs = jcr->store_bsock;
1090 timer_start = bs->timer_start;
1091 if (timer_start && (watchdog_time - timer_start) > bs->timeout) {
1092 bs->timer_start = 0; /* turn off timer */
1093 bs->set_timed_out();
1094 Qmsg(jcr, M_ERROR, 0, _(
1095 "Watchdog sending kill after %d secs to thread stalled reading Storage daemon.\n"),
1096 (int)(watchdog_time - timer_start));
1097 jcr->my_thread_send_signal(TIMEOUT_SIGNAL);
1100 bs = jcr->file_bsock;
1102 timer_start = bs->timer_start;
1103 if (timer_start && (watchdog_time - timer_start) > bs->timeout) {
1104 bs->timer_start = 0; /* turn off timer */
1105 bs->set_timed_out();
1106 Qmsg(jcr, M_ERROR, 0, _(
1107 "Watchdog sending kill after %d secs to thread stalled reading File daemon.\n"),
1108 (int)(watchdog_time - timer_start));
1109 jcr->my_thread_send_signal(TIMEOUT_SIGNAL);
1112 bs = jcr->dir_bsock;
1114 timer_start = bs->timer_start;
1115 if (timer_start && (watchdog_time - timer_start) > bs->timeout) {
1116 bs->timer_start = 0; /* turn off timer */
1117 bs->set_timed_out();
1118 Qmsg(jcr, M_ERROR, 0, _(
1119 "Watchdog sending kill after %d secs to thread stalled reading Director.\n"),
1120 (int)(watchdog_time - timer_start));
1121 jcr->my_thread_send_signal(TIMEOUT_SIGNAL);
1127 Dmsg0(dbglvl, "Finished JCR timeout checks\n");
1131 * Return next JobId from comma separated list
1134 * 1 if next JobId returned
1135 * 0 if no more JobIds are in list
1136 * -1 there is an error
1138 int get_next_jobid_from_list(char **p, uint32_t *JobId)
1140 const int maxlen = 30;
1141 char jobid[maxlen+1];
1145 for (int i=0; i<maxlen; i++) {
1148 } else if (*q == ',') {
1155 if (jobid[0] == 0) {
1157 } else if (!is_a_number(jobid)) {
1158 return -1; /* error */
1161 *JobId = str_to_int64(jobid);
1166 * Timeout signal comes here
1168 extern "C" void timeout_handler(int sig)
1170 return; /* thus interrupting the function */
1173 /* Used to display specific daemon information after a fatal signal
1174 * (like BDB in the director)
1176 #define MAX_DBG_HOOK 10
1177 static dbg_jcr_hook_t *dbg_jcr_hooks[MAX_DBG_HOOK];
1178 static int dbg_jcr_handler_count;
1180 void dbg_jcr_add_hook(dbg_jcr_hook_t *hook)
1182 ASSERT(dbg_jcr_handler_count < MAX_DBG_HOOK);
1183 dbg_jcr_hooks[dbg_jcr_handler_count++] = hook;
1186 /* on win32, the pthread_t is a struct, so we don't display it */
1187 #ifdef HAVE_MINGW_W64
1188 # define get_threadid(a) (void *)0
1190 # define get_threadid(a) (void *)(a)
1195 * This function should be used ONLY after a fatal signal. We walk through the
1196 * JCR chain without doing any lock, Bacula should not be running.
1198 void dbg_print_jcr(FILE *fp)
1200 char buf1[128], buf2[128], buf3[128], buf4[128];
1206 fprintf(fp, "Attempt to dump current JCRs. njcrs=%d\n", jcrs->size());
1208 for (JCR *jcr = (JCR *)jcrs->first(); jcr ; jcr = (JCR *)jcrs->next(jcr)) {
1209 fprintf(fp, "threadid=%p JobId=%d JobStatus=%c jcr=%p name=%s\n",
1210 get_threadid(jcr->my_thread_id), (int)jcr->JobId, jcr->JobStatus, jcr, jcr->Job);
1211 fprintf(fp, "\tuse_count=%i killable=%d\n",
1212 jcr->use_count(), jcr->is_killable());
1213 fprintf(fp, "\tJobType=%c JobLevel=%c\n",
1214 jcr->getJobType(), jcr->getJobLevel());
1215 bstrftime(buf1, sizeof(buf1), jcr->sched_time);
1216 bstrftime(buf2, sizeof(buf2), jcr->start_time);
1217 bstrftime(buf3, sizeof(buf3), jcr->end_time);
1218 bstrftime(buf4, sizeof(buf4), jcr->wait_time);
1219 fprintf(fp, "\tsched_time=%s start_time=%s\n\tend_time=%s wait_time=%s\n",
1220 buf1, buf2, buf3, buf4);
1221 fprintf(fp, "\tdb=%p db_batch=%p batch_started=%i\n",
1222 jcr->db, jcr->db_batch, jcr->batch_started);
1225 * Call all the jcr debug hooks
1227 for(int i=0; i < dbg_jcr_handler_count; i++) {
1228 dbg_jcr_hook_t *hook = dbg_jcr_hooks[i];