2 Bacula® - The Network Backup Solution
4 Copyright (C) 2000-2010 Free Software Foundation Europe e.V.
6 The main author of Bacula is Kern Sibbald, with contributions from
7 many others, a complete list can be found in the file AUTHORS.
8 This program is Free Software; you can redistribute it and/or
9 modify it under the terms of version two of the GNU General Public
10 License as published by the Free Software Foundation and included
13 This program is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
23 Bacula® is a registered trademark of Kern Sibbald.
24 The licensor of Bacula is the Free Software Foundation Europe
25 (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich,
26 Switzerland, email:ftf@fsfeurope.org.
29 * Manipulation routines for Job Control Records and
30 * handling of last_jobs_list.
32 * Kern E. Sibbald, December 2000
34 * These routines are thread safe.
36 * The job list routines were re-written in May 2005 to
37 * eliminate the global lock while traversing the list, and
38 * to use the dlist subroutines. The locking is now done
39 * on the list each time the list is modified or traversed.
40 * That is it is "micro-locked" rather than globally locked.
41 * The result is that there is one lock/unlock for each entry
42 * in the list while traversing it rather than a single lock
43 * at the beginning of a traversal and one at the end. This
44 * incurs slightly more overhead, but effectively eliminates
45 * the possibilty of race conditions. In addition, with the
46 * exception of the global locking of the list during the
47 * re-reading of the config file, no recursion is needed.
54 const int dbglvl = 3400;
56 /* External variables we reference */
58 /* External referenced functions */
59 void free_bregexps(alist *bregexps);
61 /* Forward referenced functions */
62 extern "C" void timeout_handler(int sig);
63 static void jcr_timeout_check(watchdog_t *self);
64 #ifdef TRACE_JCR_CHAIN
65 static void b_lock_jcr_chain(const char *filen, int line);
66 static void b_unlock_jcr_chain(const char *filen, int line);
67 #define lock_jcr_chain() b_lock_jcr_chain(__FILE__, __LINE__);
68 #define unlock_jcr_chain() b_unlock_jcr_chain(__FILE__, __LINE__);
70 static void lock_jcr_chain();
71 static void unlock_jcr_chain();
76 dlist *last_jobs = NULL;
77 const int max_last_jobs = 10;
79 static dlist *jcrs = NULL; /* JCR chain */
80 static pthread_mutex_t jcr_lock = PTHREAD_MUTEX_INITIALIZER;
82 static pthread_mutex_t job_start_mutex = PTHREAD_MUTEX_INITIALIZER;
84 static pthread_mutex_t last_jobs_mutex = PTHREAD_MUTEX_INITIALIZER;
86 static pthread_key_t jcr_key; /* Pointer to jcr for each thread */
88 pthread_once_t key_once = PTHREAD_ONCE_INIT;
101 void init_last_jobs_list()
104 struct s_last_job *job_entry = NULL;
106 last_jobs = New(dlist(job_entry, &job_entry->link));
109 jcrs = New(dlist(jcr, &jcr->link));
113 void term_last_jobs_list()
116 lock_last_jobs_list();
117 while (!last_jobs->empty()) {
118 void *je = last_jobs->first();
119 last_jobs->remove(je);
124 unlock_last_jobs_list();
132 bool read_last_jobs_list(int fd, uint64_t addr)
134 struct s_last_job *je, job;
138 Dmsg1(100, "read_last_jobs seek to %d\n", (int)addr);
139 if (addr == 0 || lseek(fd, (boffset_t)addr, SEEK_SET) < 0) {
142 if (read(fd, &num, sizeof(num)) != sizeof(num)) {
145 Dmsg1(100, "Read num_items=%d\n", num);
146 if (num > 4 * max_last_jobs) { /* sanity check */
149 lock_last_jobs_list();
150 for ( ; num; num--) {
151 if (read(fd, &job, sizeof(job)) != sizeof(job)) {
153 Pmsg1(000, "Read job entry. ERR=%s\n", be.bstrerror());
158 je = (struct s_last_job *)malloc(sizeof(struct s_last_job));
159 memcpy((char *)je, (char *)&job, sizeof(job));
161 init_last_jobs_list();
163 last_jobs->append(je);
164 if (last_jobs->size() > max_last_jobs) {
165 je = (struct s_last_job *)last_jobs->first();
166 last_jobs->remove(je);
171 unlock_last_jobs_list();
175 uint64_t write_last_jobs_list(int fd, uint64_t addr)
177 struct s_last_job *je;
181 Dmsg1(100, "write_last_jobs seek to %d\n", (int)addr);
182 if (lseek(fd, (boffset_t)addr, SEEK_SET) < 0) {
186 lock_last_jobs_list();
187 /* First record is number of entires */
188 num = last_jobs->size();
189 if (write(fd, &num, sizeof(num)) != sizeof(num)) {
191 Pmsg1(000, "Error writing num_items: ERR=%s\n", be.bstrerror());
194 foreach_dlist(je, last_jobs) {
195 if (write(fd, je, sizeof(struct s_last_job)) != sizeof(struct s_last_job)) {
197 Pmsg1(000, "Error writing job: ERR=%s\n", be.bstrerror());
201 unlock_last_jobs_list();
203 /* Return current address */
204 stat = lseek(fd, 0, SEEK_CUR);
211 unlock_last_jobs_list();
215 void lock_last_jobs_list()
220 void unlock_last_jobs_list()
225 /* Get an ASCII representation of the Operation being performed as an english Noun */
226 const char *JCR::get_OperationName()
232 return _("Verifying");
234 return _("Restoring");
236 return _("Archiving");
240 return _("Migration");
242 return _("Scanning");
244 return _("Unknown operation");
248 /* Get an ASCII representation of the Action being performed either an english Verb or Adjective */
249 const char *JCR::get_ActionName(bool past)
255 return (past == true) ? _("verified") : _("verify");
257 return (past == true) ? _("restored") : _("restore");
259 return (past == true) ? _("archived") : _("archive");
261 return (past == true) ? _("copied") : _("copy");
263 return (past == true) ? _("migrated") : _("migrate");
265 return (past == true) ? _("scanned") : _("scan");
267 return _("unknown action");
280 if (m_JobLevel == L_VIRTUAL_FULL) {
291 * Push a subroutine address into the job end callback stack
293 void job_end_push(JCR *jcr, void job_end_cb(JCR *jcr,void *), void *ctx)
295 jcr->job_end_push.append((void *)job_end_cb);
296 jcr->job_end_push.append(ctx);
299 /* Pop each job_end subroutine and call it */
300 static void job_end_pop(JCR *jcr)
302 void (*job_end_cb)(JCR *jcr, void *ctx);
304 for (int i=jcr->job_end_push.size()-1; i > 0; ) {
305 ctx = jcr->job_end_push.get(i--);
306 job_end_cb = (void (*)(JCR *,void *))jcr->job_end_push.get(i--);
307 job_end_cb(jcr, ctx);
312 * Create thread key for thread specific data
314 void create_jcr_key()
316 int status = pthread_key_create(&jcr_key, NULL);
319 Jmsg1(NULL, M_ABORT, 0, _("pthread key create failed: ERR=%s\n"),
320 be.bstrerror(status));
325 * Create a Job Control Record and link it into JCR chain
326 * Returns newly allocated JCR
327 * Note, since each daemon has a different JCR, he passes
330 JCR *new_jcr(int size, JCR_free_HANDLER *daemon_free_jcr)
333 MQUEUE_ITEM *item = NULL;
334 struct sigaction sigtimer;
337 Dmsg0(dbglvl, "Enter new_jcr\n");
338 status = pthread_once(&key_once, create_jcr_key);
341 Jmsg1(NULL, M_ABORT, 0, _("pthread_once failed. ERR=%s\n"), be.bstrerror(status));
343 jcr = (JCR *)malloc(size);
344 memset(jcr, 0, size);
345 jcr->msg_queue = New(dlist(item, &item->link));
346 if ((status = pthread_mutex_init(&jcr->msg_queue_mutex, NULL)) != 0) {
348 Jmsg(NULL, M_ABORT, 0, _("Could not init msg_queue mutex. ERR=%s\n"),
349 be.bstrerror(status));
351 jcr->job_end_push.init(1, false);
352 jcr->sched_time = time(NULL);
353 jcr->daemon_free_jcr = daemon_free_jcr; /* plug daemon free routine */
355 jcr->inc_use_count();
356 jcr->VolumeName = get_pool_memory(PM_FNAME);
357 jcr->VolumeName[0] = 0;
358 jcr->errmsg = get_pool_memory(PM_MESSAGE);
360 jcr->comment = get_pool_memory(PM_FNAME);
362 /* Setup some dummy values */
363 bstrncpy(jcr->Job, "*System*", sizeof(jcr->Job));
365 jcr->setJobType(JT_SYSTEM); /* internal job until defined */
366 jcr->setJobLevel(L_NONE);
367 jcr->setJobStatus(JS_Created); /* ready to run */
369 sigtimer.sa_flags = 0;
370 sigtimer.sa_handler = timeout_handler;
371 sigfillset(&sigtimer.sa_mask);
372 sigaction(TIMEOUT_SIGNAL, &sigtimer, NULL);
375 * Locking jobs is a global lock that is needed
376 * so that the Director can stop new jobs from being
377 * added to the jcr chain while it processes a new
378 * conf file and does the job_end_push().
383 jcrs = New(dlist(jcr, &jcr->link));
394 * Remove a JCR from the chain
395 * NOTE! The chain must be locked prior to calling
398 static void remove_jcr(JCR *jcr)
400 Dmsg0(dbglvl, "Enter remove_jcr\n");
402 Emsg0(M_ABORT, 0, _("NULL jcr.\n"));
405 Dmsg0(dbglvl, "Leave remove_jcr\n");
409 * Free stuff common to all JCRs. N.B. Be careful to include only
410 * generic stuff in the common part of the jcr.
412 static void free_common_jcr(JCR *jcr)
414 /* Uses jcr lock/unlock */
415 remove_jcr_from_tsd(jcr);
417 jcr->destroy_mutex();
419 if (jcr->msg_queue) {
420 delete jcr->msg_queue;
421 jcr->msg_queue = NULL;
422 pthread_mutex_destroy(&jcr->msg_queue_mutex);
424 close_msg(jcr); /* close messages for this job */
426 /* do this after closing messages */
427 if (jcr->client_name) {
428 free_pool_memory(jcr->client_name);
429 jcr->client_name = NULL;
433 free_pool_memory(jcr->attr);
437 if (jcr->sd_auth_key) {
438 free(jcr->sd_auth_key);
439 jcr->sd_auth_key = NULL;
441 if (jcr->VolumeName) {
442 free_pool_memory(jcr->VolumeName);
443 jcr->VolumeName = NULL;
446 if (jcr->dir_bsock) {
447 bnet_close(jcr->dir_bsock);
448 jcr->dir_bsock = NULL;
451 free_pool_memory(jcr->errmsg);
458 if (jcr->RegexWhere) {
459 free(jcr->RegexWhere);
460 jcr->RegexWhere = NULL;
462 if (jcr->where_bregexp) {
463 free_bregexps(jcr->where_bregexp);
464 delete jcr->where_bregexp;
465 jcr->where_bregexp = NULL;
467 if (jcr->cached_path) {
468 free_pool_memory(jcr->cached_path);
469 jcr->cached_path = NULL;
473 free_guid_list(jcr->id_list);
477 free_pool_memory(jcr->comment);
484 * Global routine to free a jcr
487 void b_free_jcr(const char *file, int line, JCR *jcr)
489 struct s_last_job *je;
491 Dmsg3(dbglvl, "Enter free_jcr jid=%u from %s:%d\n", jcr->JobId, file, line);
495 void free_jcr(JCR *jcr)
497 struct s_last_job *je;
499 Dmsg3(dbglvl, "Enter free_jcr jid=%u use_count=%d Job=%s\n",
500 jcr->JobId, jcr->use_count(), jcr->Job);
505 jcr->dec_use_count(); /* decrement use count */
506 if (jcr->use_count() < 0) {
507 Jmsg2(jcr, M_ERROR, 0, _("JCR use_count=%d JobId=%d\n"),
508 jcr->use_count(), jcr->JobId);
510 if (jcr->JobId > 0) {
511 Dmsg3(dbglvl, "Dec free_jcr jid=%u use_count=%d Job=%s\n",
512 jcr->JobId, jcr->use_count(), jcr->Job);
514 if (jcr->use_count() > 0) { /* if in use */
518 if (jcr->JobId > 0) {
519 Dmsg3(dbglvl, "remove jcr jid=%u use_count=%d Job=%s\n",
520 jcr->JobId, jcr->use_count(), jcr->Job);
522 remove_jcr(jcr); /* remove Jcr from chain */
525 dequeue_messages(jcr);
526 job_end_pop(jcr); /* pop and call hooked routines */
528 Dmsg1(dbglvl, "End job=%d\n", jcr->JobId);
530 /* Keep some statistics */
531 switch (jcr->getJobType()) {
538 /* Keep list of last jobs, but not Console where JobId==0 */
539 if (jcr->JobId > 0) {
540 lock_last_jobs_list();
542 je = (struct s_last_job *)malloc(sizeof(struct s_last_job));
543 memset(je, 0, sizeof(struct s_last_job)); /* zero in case unset fields */
544 je->Errors = jcr->JobErrors;
545 je->JobType = jcr->getJobType();
546 je->JobId = jcr->JobId;
547 je->VolSessionId = jcr->VolSessionId;
548 je->VolSessionTime = jcr->VolSessionTime;
549 bstrncpy(je->Job, jcr->Job, sizeof(je->Job));
550 je->JobFiles = jcr->JobFiles;
551 je->JobBytes = jcr->JobBytes;
552 je->JobStatus = jcr->JobStatus;
553 je->JobLevel = jcr->getJobLevel();
554 je->start_time = jcr->start_time;
555 je->end_time = time(NULL);
558 init_last_jobs_list();
560 last_jobs->append(je);
561 if (last_jobs->size() > max_last_jobs) {
562 je = (struct s_last_job *)last_jobs->first();
563 last_jobs->remove(je);
566 unlock_last_jobs_list();
573 if (jcr->daemon_free_jcr) {
574 jcr->daemon_free_jcr(jcr); /* call daemon free routine */
577 free_common_jcr(jcr);
578 close_msg(NULL); /* flush any daemon messages */
579 garbage_collect_memory_pool();
580 Dmsg0(dbglvl, "Exit free_jcr\n");
584 * Remove jcr from thread specific data, but
585 * but make sure it is us who are attached.
587 void remove_jcr_from_tsd(JCR *jcr)
589 JCR *tjcr = get_jcr_from_tsd();
592 jcr->my_thread_running = false;
593 memset(&jcr->my_thread_id, 0, sizeof(jcr->my_thread_id));
595 set_jcr_in_tsd(INVALID_JCR);
600 * Put this jcr in the thread specifc data
601 * if update_thread_info is true and the jcr is valide,
602 * we update the my_thread_id in the JCR
604 void set_jcr_in_tsd(JCR *jcr, bool update_thread_info)
606 int status = pthread_setspecific(jcr_key, (void *)jcr);
609 Jmsg1(jcr, M_ABORT, 0, _("pthread_setspecific failed: ERR=%s\n"),
610 be.bstrerror(status));
613 /* We explicitly ask to set a jcr in tsd, we can update jcr->my_thread
615 if (update_thread_info && jcr && jcr != INVALID_JCR) {
616 Dmsg2(100, "setting my_thread_stuffs 0x%p => 0x%p\n",
617 jcr->my_thread_id, pthread_self());
619 //ASSERT(jcr->my_thread_running == false);
620 jcr->my_thread_id = pthread_self();
621 jcr->my_thread_running = true;
626 void JCR::my_thread_send_signal(int sig)
629 if ( this->my_thread_running
630 && !pthread_equal(this->my_thread_id, pthread_self()))
632 Dmsg1(800, "Send kill to jid=%d\n", this->JobId);
633 pthread_kill(this->my_thread_id, sig);
635 } else if (!this->my_thread_running) {
636 Dmsg1(10, "Warning, can't send kill to jid=%d\n", this->JobId);
642 * Give me the jcr that is attached to this thread
644 JCR *get_jcr_from_tsd()
646 JCR *jcr = (JCR *)pthread_getspecific(jcr_key);
647 // printf("get_jcr_from_tsd: jcr=%p\n", jcr);
648 /* set any INVALID_JCR to NULL which the rest of Bacula understands */
649 if (jcr == INVALID_JCR) {
657 * Find which JobId corresponds to the current thread
659 uint32_t get_jobid_from_tsd()
663 jcr = get_jcr_from_tsd();
664 // printf("get_jobid_from_tsr: jcr=%p\n", jcr);
666 JobId = (uint32_t)jcr->JobId;
672 * Given a JobId, find the JCR
673 * Returns: jcr on success
676 JCR *get_jcr_by_id(uint32_t JobId)
681 if (jcr->JobId == JobId) {
682 jcr->inc_use_count();
683 Dmsg3(dbglvl, "Inc get_jcr jid=%u use_count=%d Job=%s\n",
684 jcr->JobId, jcr->use_count(), jcr->Job);
693 * Given a thread id, find the JobId
694 * Returns: JobId on success
697 uint32_t get_jobid_from_tid(pthread_t tid)
703 if (pthread_equal(jcr->my_thread_id, tid)) {
717 * Given a SessionId and SessionTime, find the JCR
718 * Returns: jcr on success
721 JCR *get_jcr_by_session(uint32_t SessionId, uint32_t SessionTime)
726 if (jcr->VolSessionId == SessionId &&
727 jcr->VolSessionTime == SessionTime) {
728 jcr->inc_use_count();
729 Dmsg3(dbglvl, "Inc get_jcr jid=%u use_count=%d Job=%s\n",
730 jcr->JobId, jcr->use_count(), jcr->Job);
740 * Given a Job, find the JCR
741 * compares on the number of characters in Job
742 * thus allowing partial matches.
743 * Returns: jcr on success
746 JCR *get_jcr_by_partial_name(char *Job)
756 if (strncmp(Job, jcr->Job, len) == 0) {
757 jcr->inc_use_count();
758 Dmsg3(dbglvl, "Inc get_jcr jid=%u use_count=%d Job=%s\n",
759 jcr->JobId, jcr->use_count(), jcr->Job);
769 * Given a Job, find the JCR
770 * requires an exact match of names.
771 * Returns: jcr on success
774 JCR *get_jcr_by_full_name(char *Job)
782 if (strcmp(jcr->Job, Job) == 0) {
783 jcr->inc_use_count();
784 Dmsg3(dbglvl, "Inc get_jcr jid=%u use_count=%d Job=%s\n",
785 jcr->JobId, jcr->use_count(), jcr->Job);
793 static void update_wait_time(JCR *jcr, int newJobStatus)
795 bool enter_in_waittime;
796 int oldJobStatus = jcr->JobStatus;
798 switch (newJobStatus) {
803 case JS_WaitStoreRes:
805 case JS_WaitClientRes:
807 case JS_WaitPriority:
808 enter_in_waittime = true;
811 enter_in_waittime = false; /* not a Wait situation */
816 * If we were previously waiting and are not any more
817 * we want to update the wait_time variable, which is
818 * the start of waiting.
820 switch (oldJobStatus) {
825 case JS_WaitStoreRes:
827 case JS_WaitClientRes:
829 case JS_WaitPriority:
830 if (!enter_in_waittime) { /* we get out the wait time */
831 jcr->wait_time_sum += (time(NULL) - jcr->wait_time);
836 /* if wait state is new, we keep current time for watchdog MaxWaitTime */
838 if (enter_in_waittime) {
839 jcr->wait_time = time(NULL);
846 * Priority runs from 0 (lowest) to 10 (highest)
848 static int get_status_priority(int JobStatus)
852 case JS_ErrorTerminated:
869 void set_jcr_job_status(JCR *jcr, int JobStatus)
871 jcr->setJobStatus(JobStatus);
874 void JCR::setJobStatus(int newJobStatus)
877 int priority, old_priority;
878 int oldJobStatus = jcr->JobStatus;
879 priority = get_status_priority(newJobStatus);
880 old_priority = get_status_priority(oldJobStatus);
882 Dmsg2(800, "set_jcr_job_status(%s, %c)\n", Job, newJobStatus);
884 /* Update wait_time depending on newJobStatus and oldJobStatus */
885 update_wait_time(jcr, newJobStatus);
888 * For a set of errors, ... keep the current status
889 * so it isn't lost. For all others, set it.
891 Dmsg2(800, "OnEntry JobStatus=%c newJobstatus=%c\n", oldJobStatus, newJobStatus);
893 * If status priority is > than proposed new status, change it.
894 * If status priority == new priority and both are zero, take
896 * If it is not zero, then we keep the first non-zero "error" that
899 if (priority > old_priority || (
900 priority == 0 && old_priority == 0)) {
901 Dmsg4(800, "Set new stat. old: %c,%d new: %c,%d\n",
902 jcr->JobStatus, old_priority, newJobStatus, priority);
903 jcr->JobStatus = newJobStatus; /* replace with new status */
906 if (oldJobStatus != jcr->JobStatus) {
907 Dmsg2(800, "leave set_job_status old=%c new=%c\n", oldJobStatus, newJobStatus);
908 // generate_plugin_event(jcr, bEventStatusChange, NULL);
912 #ifdef TRACE_JCR_CHAIN
913 static int lock_count = 0;
919 #ifdef TRACE_JCR_CHAIN
920 static void b_lock_jcr_chain(const char *fname, int line)
922 static void lock_jcr_chain()
925 #ifdef TRACE_JCR_CHAIN
926 Dmsg3(dbglvl, "Lock jcr chain %d from %s:%d\n", ++lock_count, fname, line);
934 #ifdef TRACE_JCR_CHAIN
935 static void b_unlock_jcr_chain(const char *fname, int line)
937 static void unlock_jcr_chain()
940 #ifdef TRACE_JCR_CHAIN
941 Dmsg3(dbglvl, "Unlock jcr chain %d from %s:%d\n", lock_count--, fname, line);
947 * Start walk of jcr chain
948 * The proper way to walk the jcr chain is:
955 * It is possible to leave out the endeach_jcr(jcr), but
956 * in that case, the last jcr referenced must be explicitly
962 JCR *jcr_walk_start()
966 jcr = (JCR *)jcrs->first();
968 jcr->inc_use_count();
969 if (jcr->JobId > 0) {
970 Dmsg3(dbglvl, "Inc walk_start jid=%u use_count=%d Job=%s\n",
971 jcr->JobId, jcr->use_count(), jcr->Job);
979 * Get next jcr from chain, and release current one
981 JCR *jcr_walk_next(JCR *prev_jcr)
986 jcr = (JCR *)jcrs->next(prev_jcr);
988 jcr->inc_use_count();
989 if (jcr->JobId > 0) {
990 Dmsg3(dbglvl, "Inc walk_next jid=%u use_count=%d Job=%s\n",
991 jcr->JobId, jcr->use_count(), jcr->Job);
1002 * Release last jcr referenced
1004 void jcr_walk_end(JCR *jcr)
1007 if (jcr->JobId > 0) {
1008 Dmsg3(dbglvl, "Free walk_end jid=%u use_count=%d Job=%s\n",
1009 jcr->JobId, jcr->use_count(), jcr->Job);
1016 * Return number of Jobs
1024 for (jcr = (JCR *)jcrs->first(); (jcr = (JCR *)jcrs->next(jcr)); ) {
1025 if (jcr->JobId > 0) {
1035 * Setup to call the timeout check routine every 30 seconds
1036 * This routine will check any timers that have been enabled.
1038 bool init_jcr_subsystem(void)
1040 watchdog_t *wd = new_watchdog();
1042 wd->one_shot = false;
1043 wd->interval = 30; /* FIXME: should be configurable somewhere, even
1044 if only with a #define */
1045 wd->callback = jcr_timeout_check;
1047 register_watchdog(wd);
1052 static void jcr_timeout_check(watchdog_t *self)
1058 Dmsg0(dbglvl, "Start JCR timeout checks\n");
1060 /* Walk through all JCRs checking if any one is
1061 * blocked for more than specified max time.
1064 Dmsg2(dbglvl, "jcr_timeout_check JobId=%u jcr=0x%x\n", jcr->JobId, jcr);
1065 if (jcr->JobId == 0) {
1068 bs = jcr->store_bsock;
1070 timer_start = bs->timer_start;
1071 if (timer_start && (watchdog_time - timer_start) > bs->timeout) {
1072 bs->timer_start = 0; /* turn off timer */
1073 bs->set_timed_out();
1074 Qmsg(jcr, M_ERROR, 0, _(
1075 "Watchdog sending kill after %d secs to thread stalled reading Storage daemon.\n"),
1076 watchdog_time - timer_start);
1077 jcr->my_thread_send_signal(TIMEOUT_SIGNAL);
1080 bs = jcr->file_bsock;
1082 timer_start = bs->timer_start;
1083 if (timer_start && (watchdog_time - timer_start) > bs->timeout) {
1084 bs->timer_start = 0; /* turn off timer */
1085 bs->set_timed_out();
1086 Qmsg(jcr, M_ERROR, 0, _(
1087 "Watchdog sending kill after %d secs to thread stalled reading File daemon.\n"),
1088 watchdog_time - timer_start);
1089 jcr->my_thread_send_signal(TIMEOUT_SIGNAL);
1092 bs = jcr->dir_bsock;
1094 timer_start = bs->timer_start;
1095 if (timer_start && (watchdog_time - timer_start) > bs->timeout) {
1096 bs->timer_start = 0; /* turn off timer */
1097 bs->set_timed_out();
1098 Qmsg(jcr, M_ERROR, 0, _(
1099 "Watchdog sending kill after %d secs to thread stalled reading Director.\n"),
1100 watchdog_time - timer_start);
1101 jcr->my_thread_send_signal(TIMEOUT_SIGNAL);
1107 Dmsg0(dbglvl, "Finished JCR timeout checks\n");
1111 * Return next JobId from comma separated list
1114 * 1 if next JobId returned
1115 * 0 if no more JobIds are in list
1116 * -1 there is an error
1118 int get_next_jobid_from_list(char **p, uint32_t *JobId)
1120 const int maxlen = 30;
1121 char jobid[maxlen+1];
1125 for (int i=0; i<maxlen; i++) {
1128 } else if (*q == ',') {
1135 if (jobid[0] == 0) {
1137 } else if (!is_a_number(jobid)) {
1138 return -1; /* error */
1141 *JobId = str_to_int64(jobid);
1146 * Timeout signal comes here
1148 extern "C" void timeout_handler(int sig)
1150 return; /* thus interrupting the function */
1153 /* Used to display specific daemon information after a fatal signal
1154 * (like B_DB in the director)
1156 #define MAX_DBG_HOOK 10
1157 static dbg_jcr_hook_t *dbg_jcr_hooks[MAX_DBG_HOOK];
1158 static int dbg_jcr_handler_count;
1160 void dbg_jcr_add_hook(dbg_jcr_hook_t *hook)
1162 ASSERT(dbg_jcr_handler_count < MAX_DBG_HOOK);
1163 dbg_jcr_hooks[dbg_jcr_handler_count++] = hook;
1169 * This function should be used ONLY after a fatal signal. We walk through the
1170 * JCR chain without doing any lock, Bacula should not be running.
1172 void dbg_print_jcr(FILE *fp)
1174 char buf1[128], buf2[128], buf3[128], buf4[128];
1179 fprintf(fp, "Attempt to dump current JCRs. njcrs=%d\n", jcrs->size());
1181 for (JCR *jcr = (JCR *)jcrs->first(); jcr ; jcr = (JCR *)jcrs->next(jcr)) {
1182 fprintf(fp, "threadid=%p JobId=%d JobStatus=%c jcr=%p name=%s\n",
1183 (void *)jcr->my_thread_id, (int)jcr->JobId, jcr->JobStatus, jcr, jcr->Job);
1184 fprintf(fp, "\tuse_count=%i\n", jcr->use_count());
1185 fprintf(fp, "\tJobType=%c JobLevel=%c\n",
1186 jcr->getJobType(), jcr->getJobLevel());
1187 bstrftime(buf1, sizeof(buf1), jcr->sched_time);
1188 bstrftime(buf2, sizeof(buf2), jcr->start_time);
1189 bstrftime(buf3, sizeof(buf3), jcr->end_time);
1190 bstrftime(buf4, sizeof(buf4), jcr->wait_time);
1191 fprintf(fp, "\tsched_time=%s start_time=%s\n\tend_time=%s wait_time=%s\n",
1192 buf1, buf2, buf3, buf4);
1193 fprintf(fp, "\tdb=%p db_batch=%p batch_started=%i\n",
1194 jcr->db, jcr->db_batch, jcr->batch_started);
1197 * Call all the jcr debug hooks
1199 for(int i=0; i < dbg_jcr_handler_count; i++) {
1200 dbg_jcr_hook_t *hook = dbg_jcr_hooks[i];