2 Bacula(R) - The Network Backup Solution
4 Copyright (C) 2000-2017 Kern Sibbald
6 The original author of Bacula is Kern Sibbald, with contributions
7 from many others, a complete list can be found in the file AUTHORS.
9 You may use this file and others of this release according to the
10 license defined in the LICENSE file, which includes the Affero General
11 Public License, v3.0 ("AGPLv3") and some additional permissions and
12 terms pursuant to its AGPLv3 Section 7.
14 This notice must be preserved when any source code is
15 conveyed and/or propagated.
17 Bacula(R) is a registered trademark of Kern Sibbald.
20 * Manipulation routines for Job Control Records and
21 * handling of last_jobs_list.
23 * Kern E. Sibbald, December 2000
25 * These routines are thread safe.
27 * The job list routines were re-written in May 2005 to
28 * eliminate the global lock while traversing the list, and
29 * to use the dlist subroutines. The locking is now done
30 * on the list each time the list is modified or traversed.
31 * That is it is "micro-locked" rather than globally locked.
32 * The result is that there is one lock/unlock for each entry
33 * in the list while traversing it rather than a single lock
34 * at the beginning of a traversal and one at the end. This
35 * incurs slightly more overhead, but effectively eliminates
36 * the possibilty of race conditions. In addition, with the
37 * exception of the global locking of the list during the
38 * re-reading of the config file, no recursion is needed.
45 const int dbglvl = 3400;
47 /* External variables we reference */
49 /* External referenced functions */
50 void free_bregexps(alist *bregexps);
52 /* Forward referenced functions */
53 extern "C" void timeout_handler(int sig);
54 static void jcr_timeout_check(watchdog_t *self);
55 #ifdef TRACE_JCR_CHAIN
56 static void b_lock_jcr_chain(const char *filen, int line);
57 static void b_unlock_jcr_chain(const char *filen, int line);
58 #define lock_jcr_chain() b_lock_jcr_chain(__FILE__, __LINE__);
59 #define unlock_jcr_chain() b_unlock_jcr_chain(__FILE__, __LINE__);
61 static void lock_jcr_chain();
62 static void unlock_jcr_chain();
67 dlist *last_jobs = NULL;
68 const int max_last_jobs = 10;
70 static dlist *jcrs = NULL; /* JCR chain */
71 static pthread_mutex_t jcr_lock = PTHREAD_MUTEX_INITIALIZER;
73 static pthread_mutex_t job_start_mutex = PTHREAD_MUTEX_INITIALIZER;
75 static pthread_mutex_t last_jobs_mutex = PTHREAD_MUTEX_INITIALIZER;
77 static pthread_key_t jcr_key; /* Pointer to jcr for each thread */
79 pthread_once_t key_once = PTHREAD_ONCE_INIT;
81 static char Job_status[] = "Status JobId=%ld JobStatus=%d\n";
94 void init_last_jobs_list()
97 struct s_last_job *job_entry = NULL;
99 last_jobs = New(dlist(job_entry, &job_entry->link));
102 jcrs = New(dlist(jcr, &jcr->link));
106 void term_last_jobs_list()
109 lock_last_jobs_list();
110 while (!last_jobs->empty()) {
111 void *je = last_jobs->first();
112 last_jobs->remove(je);
117 unlock_last_jobs_list();
125 bool read_last_jobs_list(int fd, uint64_t addr)
127 struct s_last_job *je, job;
131 Dmsg1(100, "read_last_jobs seek to %d\n", (int)addr);
132 if (addr == 0 || lseek(fd, (boffset_t)addr, SEEK_SET) < 0) {
135 if (read(fd, &num, sizeof(num)) != sizeof(num)) {
138 Dmsg1(100, "Read num_items=%d\n", num);
139 if (num > 4 * max_last_jobs) { /* sanity check */
142 lock_last_jobs_list();
143 for ( ; num; num--) {
144 if (read(fd, &job, sizeof(job)) != sizeof(job)) {
146 Pmsg1(000, "Read job entry. ERR=%s\n", be.bstrerror());
151 je = (struct s_last_job *)malloc(sizeof(struct s_last_job));
152 memcpy((char *)je, (char *)&job, sizeof(job));
154 init_last_jobs_list();
156 last_jobs->append(je);
157 if (last_jobs->size() > max_last_jobs) {
158 je = (struct s_last_job *)last_jobs->first();
159 last_jobs->remove(je);
164 unlock_last_jobs_list();
168 uint64_t write_last_jobs_list(int fd, uint64_t addr)
170 struct s_last_job *je;
174 Dmsg1(100, "write_last_jobs seek to %d\n", (int)addr);
175 if (lseek(fd, (boffset_t)addr, SEEK_SET) < 0) {
179 lock_last_jobs_list();
180 /* First record is number of entires */
181 num = last_jobs->size();
182 if (write(fd, &num, sizeof(num)) != sizeof(num)) {
184 Pmsg1(000, "Error writing num_items: ERR=%s\n", be.bstrerror());
187 foreach_dlist(je, last_jobs) {
188 if (write(fd, je, sizeof(struct s_last_job)) != sizeof(struct s_last_job)) {
190 Pmsg1(000, "Error writing job: ERR=%s\n", be.bstrerror());
194 unlock_last_jobs_list();
196 /* Return current address */
197 stat = lseek(fd, 0, SEEK_CUR);
204 unlock_last_jobs_list();
208 void lock_last_jobs_list()
213 void unlock_last_jobs_list()
218 /* Get an ASCII representation of the Operation being performed as an english Noun */
219 const char *JCR::get_OperationName()
225 return _("Verifying");
227 return _("Restoring");
229 return _("Archiving");
233 return _("Migration");
235 return _("Scanning");
237 return _("Unknown operation");
241 /* Get an ASCII representation of the Action being performed either an english Verb or Adjective */
242 const char *JCR::get_ActionName(bool past)
248 return (past == true) ? _("verified") : _("verify");
250 return (past == true) ? _("restored") : _("restore");
252 return (past == true) ? _("archived") : _("archive");
254 return (past == true) ? _("copied") : _("copy");
256 return (past == true) ? _("migrated") : _("migrate");
258 return (past == true) ? _("scanned") : _("scan");
260 return _("unknown action");
273 if (m_JobLevel == L_VIRTUAL_FULL) {
283 /* We can stop only Backup jobs connected to a client. It doesn't make sens at
284 * this time to stop a copy, migraton, restore or a verify job. The specific
285 * code should be implemented first.
287 bool JCR::can_be_stopped()
290 if (getJobType() == JT_BACKUP) { /* Is a Backup */
291 if (getJobLevel() == L_VIRTUAL_FULL) { /* Is a VirtualFull */
294 } else { /* Is not a backup (so, copy, migration, admin, verify, ... */
301 * Push a subroutine address into the job end callback stack
303 void job_end_push(JCR *jcr, void job_end_cb(JCR *jcr,void *), void *ctx)
305 jcr->job_end_push.append((void *)job_end_cb);
306 jcr->job_end_push.append(ctx);
309 /* DELETE ME when bugs in MA1512, MA1632 MA1639 are fixed */
310 void (*MA1512_reload_job_end_cb)(JCR *,void *) = NULL;
312 /* Pop each job_end subroutine and call it */
313 static void job_end_pop(JCR *jcr)
315 void (*job_end_cb)(JCR *jcr, void *ctx);
317 for (int i=jcr->job_end_push.size()-1; i > 0; ) {
318 ctx = jcr->job_end_push.get(i--);
319 job_end_cb = (void (*)(JCR *,void *))jcr->job_end_push.get(i--);
320 /* check for bug MA1512, MA1632 MA1639,
321 * today, job_end_cb can only be reload_job_end_cb() from DIR */
322 if (job_end_cb != MA1512_reload_job_end_cb && MA1512_reload_job_end_cb != NULL) {
323 Tmsg2(0, "Bug 'job_end_pop' detected, skip ! job_end_cb=0x%p ctx=0x%p\n", job_end_cb, ctx);
324 Tmsg0(0, "Display job_end_push list\n");
325 for (int j=jcr->job_end_push.size()-1; j > 0; ) {
326 void *ctx2 = jcr->job_end_push.get(j--);
327 void *job_end_cb2 = jcr->job_end_push.get(j--);
328 Tmsg3(0, "Bug 'job_end_pop' entry[%d] job_end_cb=0x%p ctx=0x%p\n", j+1, job_end_cb2, ctx2);
332 job_end_cb(jcr, ctx);
338 * Create thread key for thread specific data
340 void create_jcr_key()
342 int status = pthread_key_create(&jcr_key, NULL);
345 Jmsg1(NULL, M_ABORT, 0, _("pthread key create failed: ERR=%s\n"),
346 be.bstrerror(status));
351 * Create a Job Control Record and link it into JCR chain
352 * Returns newly allocated JCR
353 * Note, since each daemon has a different JCR, he passes
356 JCR *new_jcr(int size, JCR_free_HANDLER *daemon_free_jcr)
359 MQUEUE_ITEM *item = NULL;
362 Dmsg0(dbglvl, "Enter new_jcr\n");
363 status = pthread_once(&key_once, create_jcr_key);
366 Jmsg1(NULL, M_ABORT, 0, _("pthread_once failed. ERR=%s\n"), be.bstrerror(status));
368 jcr = (JCR *)malloc(size);
370 /* Note for the director, this value is changed in jobq.c */
371 jcr->my_thread_id = pthread_self();
372 jcr->msg_queue = New(dlist(item, &item->link));
373 if ((status = pthread_mutex_init(&jcr->msg_queue_mutex, NULL)) != 0) {
375 Jmsg(NULL, M_ABORT, 0, _("Could not init msg_queue mutex. ERR=%s\n"),
376 be.bstrerror(status));
378 jcr->job_end_push.init(1, false);
379 jcr->sched_time = time(NULL);
380 jcr->initial_sched_time = jcr->sched_time;
381 jcr->daemon_free_jcr = daemon_free_jcr; /* plug daemon free routine */
383 jcr->inc_use_count();
384 jcr->VolumeName = get_pool_memory(PM_FNAME);
385 jcr->VolumeName[0] = 0;
386 jcr->errmsg = get_pool_memory(PM_MESSAGE);
388 jcr->comment = get_pool_memory(PM_FNAME);
390 jcr->StatusErrMsg = get_pool_memory(PM_FNAME);
391 jcr->StatusErrMsg[0] = 0;
393 /* Setup some dummy values */
394 bstrncpy(jcr->Job, "*System*", sizeof(jcr->Job));
396 jcr->setJobType(JT_SYSTEM); /* internal job until defined */
397 jcr->setJobLevel(L_NONE);
398 jcr->setJobStatus(JS_Created); /* ready to run */
400 struct sigaction sigtimer;
401 sigtimer.sa_flags = 0;
402 sigtimer.sa_handler = timeout_handler;
403 sigfillset(&sigtimer.sa_mask);
404 sigaction(TIMEOUT_SIGNAL, &sigtimer, NULL);
408 * Locking jobs is a global lock that is needed
409 * so that the Director can stop new jobs from being
410 * added to the jcr chain while it processes a new
411 * conf file and does the job_end_push().
416 jcrs = New(dlist(jcr, &jcr->link));
427 * Remove a JCR from the chain
428 * NOTE! The chain must be locked prior to calling
431 static void remove_jcr(JCR *jcr)
433 Dmsg0(dbglvl, "Enter remove_jcr\n");
435 Emsg0(M_ABORT, 0, _("NULL jcr.\n"));
438 Dmsg0(dbglvl, "Leave remove_jcr\n");
442 * Free stuff common to all JCRs. N.B. Be careful to include only
443 * generic stuff in the common part of the jcr.
445 static void free_common_jcr(JCR *jcr)
447 /* Uses jcr lock/unlock */
448 remove_jcr_from_tsd(jcr);
449 jcr->set_killable(false);
451 jcr->destroy_mutex();
453 if (jcr->msg_queue) {
454 delete jcr->msg_queue;
455 jcr->msg_queue = NULL;
456 pthread_mutex_destroy(&jcr->msg_queue_mutex);
459 /* do this after closing messages */
460 free_and_null_pool_memory(jcr->JobIds);
461 free_and_null_pool_memory(jcr->client_name);
462 free_and_null_pool_memory(jcr->attr);
463 free_and_null_pool_memory(jcr->VolumeName);
464 free_and_null_pool_memory(jcr->errmsg);
465 free_and_null_pool_memory(jcr->StatusErrMsg);
467 if (jcr->sd_auth_key) {
468 free(jcr->sd_auth_key);
469 jcr->sd_auth_key = NULL;
472 free_bsock(jcr->dir_bsock);
478 if (jcr->RegexWhere) {
479 free(jcr->RegexWhere);
480 jcr->RegexWhere = NULL;
482 if (jcr->where_bregexp) {
483 free_bregexps(jcr->where_bregexp);
484 delete jcr->where_bregexp;
485 jcr->where_bregexp = NULL;
487 if (jcr->cached_path) {
488 free_pool_memory(jcr->cached_path);
489 jcr->cached_path = NULL;
493 free_guid_list(jcr->id_list);
497 free_pool_memory(jcr->comment);
504 * Global routine to free a jcr
507 void b_free_jcr(const char *file, int line, JCR *jcr)
509 struct s_last_job *je;
511 Dmsg3(dbglvl, "Enter free_jcr jid=%u from %s:%d\n", jcr->JobId, file, line);
515 void free_jcr(JCR *jcr)
517 struct s_last_job *je;
519 Dmsg3(dbglvl, "Enter free_jcr jid=%u use_count=%d Job=%s\n",
520 jcr->JobId, jcr->use_count(), jcr->Job);
525 jcr->dec_use_count(); /* decrement use count */
526 ASSERT2(jcr->use_count() >= 0, "JCR use_count < 0");
527 // Jmsg2(jcr, M_ERROR, 0, _("JCR use_count=%d JobId=%d\n"),
528 // jcr->use_count(), jcr->JobId);
530 if (jcr->JobId > 0) {
531 Dmsg3(dbglvl, "Dec free_jcr jid=%u use_count=%d Job=%s\n",
532 jcr->JobId, jcr->use_count(), jcr->Job);
534 if (jcr->use_count() > 0) { /* if in use */
538 if (jcr->JobId > 0) {
539 Dmsg3(dbglvl, "remove jcr jid=%u use_count=%d Job=%s\n",
540 jcr->JobId, jcr->use_count(), jcr->Job);
543 remove_jcr(jcr); /* remove Jcr from chain */
546 dequeue_messages(jcr);
547 close_msg(jcr); /* close messages for this job */
548 job_end_pop(jcr); /* pop and call hooked routines */
550 Dmsg1(dbglvl, "End job=%d\n", jcr->JobId);
552 /* Keep some statistics */
553 switch (jcr->getJobType()) {
560 /* Keep list of last jobs, but not Console where JobId==0 */
561 if (jcr->JobId > 0) {
562 lock_last_jobs_list();
564 je = (struct s_last_job *)malloc(sizeof(struct s_last_job));
565 memset(je, 0, sizeof(struct s_last_job)); /* zero in case unset fields */
566 je->Errors = jcr->JobErrors;
567 je->JobType = jcr->getJobType();
568 je->JobId = jcr->JobId;
569 je->VolSessionId = jcr->VolSessionId;
570 je->VolSessionTime = jcr->VolSessionTime;
571 bstrncpy(je->Job, jcr->Job, sizeof(je->Job));
572 je->JobFiles = jcr->JobFiles;
573 je->JobBytes = jcr->JobBytes;
574 je->JobStatus = jcr->JobStatus;
575 je->JobLevel = jcr->getJobLevel();
576 je->start_time = jcr->start_time;
577 je->end_time = time(NULL);
580 init_last_jobs_list();
582 last_jobs->append(je);
583 if (last_jobs->size() > max_last_jobs) {
584 je = (struct s_last_job *)last_jobs->first();
585 last_jobs->remove(je);
588 unlock_last_jobs_list();
595 if (jcr->daemon_free_jcr) {
596 jcr->daemon_free_jcr(jcr); /* call daemon free routine */
599 free_common_jcr(jcr);
600 close_msg(NULL); /* flush any daemon messages */
601 Dmsg0(dbglvl, "Exit free_jcr\n");
605 * Remove jcr from thread specific data, but
606 * but make sure it is us who are attached.
608 void remove_jcr_from_tsd(JCR *jcr)
610 JCR *tjcr = get_jcr_from_tsd();
612 set_jcr_in_tsd(INVALID_JCR);
616 void JCR::set_killable(bool killable)
619 my_thread_killable = killable;
624 * Put this jcr in the thread specifc data
625 * if update_thread_info is true and the jcr is valide,
626 * we update the my_thread_id in the JCR
628 void set_jcr_in_tsd(JCR *jcr)
630 int status = pthread_setspecific(jcr_key, (void *)jcr);
633 Jmsg1(jcr, M_ABORT, 0, _("pthread_setspecific failed: ERR=%s\n"),
634 be.bstrerror(status));
638 void JCR::my_thread_send_signal(int sig)
640 lock_jcr_chain(); /* use global lock */
645 if (this->is_killable() &&
646 !pthread_equal(this->my_thread_id, pthread_self()))
648 Dmsg1(800, "Send kill to jid=%d\n", this->JobId);
649 pthread_kill(this->my_thread_id, sig);
650 this->exiting = true;
652 } else if (!this->is_killable()) {
653 Dmsg1(10, "Warning, cannot send kill to jid=%d marked not killable.\n", this->JobId);
661 * Give me the jcr that is attached to this thread
663 JCR *get_jcr_from_tsd()
665 JCR *jcr = (JCR *)pthread_getspecific(jcr_key);
666 // printf("get_jcr_from_tsd: jcr=%p\n", jcr);
667 /* set any INVALID_JCR to NULL which the rest of Bacula understands */
668 if (jcr == INVALID_JCR) {
676 * Find which JobId corresponds to the current thread
678 uint32_t get_jobid_from_tsd()
682 jcr = get_jcr_from_tsd();
683 // printf("get_jobid_from_tsr: jcr=%p\n", jcr);
685 JobId = (uint32_t)jcr->JobId;
691 * Given a JobId, find the JCR
692 * Returns: jcr on success
695 JCR *get_jcr_by_id(uint32_t JobId)
700 if (jcr->JobId == JobId) {
701 jcr->inc_use_count();
702 Dmsg3(dbglvl, "Inc get_jcr jid=%u use_count=%d Job=%s\n",
703 jcr->JobId, jcr->use_count(), jcr->Job);
712 * Given a thread id, find the JobId
713 * Returns: JobId on success
716 uint32_t get_jobid_from_tid(pthread_t tid)
722 if (pthread_equal(jcr->my_thread_id, tid)) {
736 * Given a SessionId and SessionTime, find the JCR
737 * Returns: jcr on success
740 JCR *get_jcr_by_session(uint32_t SessionId, uint32_t SessionTime)
745 if (jcr->VolSessionId == SessionId &&
746 jcr->VolSessionTime == SessionTime) {
747 jcr->inc_use_count();
748 Dmsg3(dbglvl, "Inc get_jcr jid=%u use_count=%d Job=%s\n",
749 jcr->JobId, jcr->use_count(), jcr->Job);
759 * Given a Job, find the JCR
760 * compares on the number of characters in Job
761 * thus allowing partial matches.
762 * Returns: jcr on success
765 JCR *get_jcr_by_partial_name(char *Job)
775 if (strncmp(Job, jcr->Job, len) == 0) {
776 jcr->inc_use_count();
777 Dmsg3(dbglvl, "Inc get_jcr jid=%u use_count=%d Job=%s\n",
778 jcr->JobId, jcr->use_count(), jcr->Job);
788 * Given a Job, find the JCR
789 * requires an exact match of names.
790 * Returns: jcr on success
793 JCR *get_jcr_by_full_name(char *Job)
801 if (strcmp(jcr->Job, Job) == 0) {
802 jcr->inc_use_count();
803 Dmsg3(dbglvl, "Inc get_jcr jid=%u use_count=%d Job=%s\n",
804 jcr->JobId, jcr->use_count(), jcr->Job);
812 static void update_wait_time(JCR *jcr, int newJobStatus)
814 bool enter_in_waittime;
815 int oldJobStatus = jcr->JobStatus;
817 switch (newJobStatus) {
822 case JS_WaitStoreRes:
824 case JS_WaitClientRes:
826 case JS_WaitPriority:
827 enter_in_waittime = true;
830 enter_in_waittime = false; /* not a Wait situation */
835 * If we were previously waiting and are not any more
836 * we want to update the wait_time variable, which is
837 * the start of waiting.
839 switch (oldJobStatus) {
844 case JS_WaitStoreRes:
846 case JS_WaitClientRes:
848 case JS_WaitPriority:
849 if (!enter_in_waittime) { /* we get out the wait time */
850 jcr->wait_time_sum += (time(NULL) - jcr->wait_time);
855 /* if wait state is new, we keep current time for watchdog MaxWaitTime */
857 if (enter_in_waittime) {
858 jcr->wait_time = time(NULL);
865 * Priority runs from 0 (lowest) to 10 (highest)
867 static int get_status_priority(int JobStatus)
874 case JS_ErrorTerminated:
890 * Send Job status to Director
892 bool JCR::sendJobStatus()
895 return dir_bsock->fsend(Job_status, JobId, JobStatus);
901 * Set and send Job status to Director
903 bool JCR::sendJobStatus(int aJobStatus)
905 if (!is_JobStatus(aJobStatus)) {
906 setJobStatus(aJobStatus);
908 return dir_bsock->fsend(Job_status, JobId, JobStatus);
914 void JCR::setJobStarted()
917 job_started_time = time(NULL);
920 static pthread_mutex_t status_lock = PTHREAD_MUTEX_INITIALIZER;
922 void JCR::setJobStatus(int newJobStatus)
924 int priority, old_priority;
925 int oldJobStatus = JobStatus;
928 priority = get_status_priority(newJobStatus);
929 old_priority = get_status_priority(oldJobStatus);
931 Dmsg2(800, "set_jcr_job_status(%ld, %c)\n", JobId, newJobStatus);
933 /* Update wait_time depending on newJobStatus and oldJobStatus */
934 update_wait_time(this, newJobStatus);
937 * For a set of errors, ... keep the current status
938 * so it isn't lost. For all others, set it.
940 Dmsg2(800, "OnEntry JobStatus=%c newJobstatus=%c\n", (oldJobStatus==0)?'0':oldJobStatus, newJobStatus);
942 * If status priority is > than proposed new status, change it.
943 * If status priority == new priority and both are zero, take
945 * If it is not zero, then we keep the first non-zero "error" that
948 if (priority > old_priority || (
949 priority == 0 && old_priority == 0)) {
950 Dmsg4(800, "Set new stat. old: %c,%d new: %c,%d\n",
951 (oldJobStatus==0)?'0':oldJobStatus, old_priority, newJobStatus, priority);
952 JobStatus = newJobStatus; /* replace with new status */
955 if (oldJobStatus != JobStatus) {
956 Dmsg2(800, "leave setJobStatus old=%c new=%c\n", (oldJobStatus==0)?'0':oldJobStatus, newJobStatus);
957 // generate_plugin_event(this, bEventStatusChange, NULL);
962 #ifdef TRACE_JCR_CHAIN
963 static int lock_count = 0;
969 #ifdef TRACE_JCR_CHAIN
970 static void b_lock_jcr_chain(const char *fname, int line)
972 static void lock_jcr_chain()
975 #ifdef TRACE_JCR_CHAIN
976 Dmsg3(dbglvl, "Lock jcr chain %d from %s:%d\n", ++lock_count, fname, line);
984 #ifdef TRACE_JCR_CHAIN
985 static void b_unlock_jcr_chain(const char *fname, int line)
987 static void unlock_jcr_chain()
990 #ifdef TRACE_JCR_CHAIN
991 Dmsg3(dbglvl, "Unlock jcr chain %d from %s:%d\n", lock_count--, fname, line);
997 * Start walk of jcr chain
998 * The proper way to walk the jcr chain is:
1000 * foreach_jcr(jcr) {
1005 * It is possible to leave out the endeach_jcr(jcr), but
1006 * in that case, the last jcr referenced must be explicitly
1012 JCR *jcr_walk_start()
1016 jcr = (JCR *)jcrs->first();
1018 jcr->inc_use_count();
1019 if (jcr->JobId > 0) {
1020 Dmsg3(dbglvl, "Inc walk_start jid=%u use_count=%d Job=%s\n",
1021 jcr->JobId, jcr->use_count(), jcr->Job);
1029 * Get next jcr from chain, and release current one
1031 JCR *jcr_walk_next(JCR *prev_jcr)
1036 jcr = (JCR *)jcrs->next(prev_jcr);
1038 jcr->inc_use_count();
1039 if (jcr->JobId > 0) {
1040 Dmsg3(dbglvl, "Inc walk_next jid=%u use_count=%d Job=%s\n",
1041 jcr->JobId, jcr->use_count(), jcr->Job);
1052 * Release last jcr referenced
1054 void jcr_walk_end(JCR *jcr)
1057 if (jcr->JobId > 0) {
1058 Dmsg3(dbglvl, "Free walk_end jid=%u use_count=%d Job=%s\n",
1059 jcr->JobId, jcr->use_count(), jcr->Job);
1066 * Return number of Jobs
1074 for (jcr = (JCR *)jcrs->first(); (jcr = (JCR *)jcrs->next(jcr)); ) {
1075 if (jcr->JobId > 0) {
1085 * Setup to call the timeout check routine every 30 seconds
1086 * This routine will check any timers that have been enabled.
1088 bool init_jcr_subsystem(void)
1090 watchdog_t *wd = new_watchdog();
1092 wd->one_shot = false;
1093 wd->interval = 30; /* FIXME: should be configurable somewhere, even
1094 if only with a #define */
1095 wd->callback = jcr_timeout_check;
1097 register_watchdog(wd);
1102 static void jcr_timeout_check(watchdog_t *self)
1108 Dmsg0(dbglvl, "Start JCR timeout checks\n");
1110 /* Walk through all JCRs checking if any one is
1111 * blocked for more than specified max time.
1114 Dmsg2(dbglvl, "jcr_timeout_check JobId=%u jcr=0x%x\n", jcr->JobId, jcr);
1115 if (jcr->JobId == 0) {
1118 bs = jcr->store_bsock;
1120 timer_start = bs->timer_start;
1121 if (timer_start && (watchdog_time - timer_start) > bs->timeout) {
1122 bs->timer_start = 0; /* turn off timer */
1123 bs->set_timed_out();
1124 Qmsg(jcr, M_ERROR, 0, _(
1125 "Watchdog sending kill after %d secs to thread stalled reading Storage daemon.\n"),
1126 (int)(watchdog_time - timer_start));
1127 jcr->my_thread_send_signal(TIMEOUT_SIGNAL);
1130 bs = jcr->file_bsock;
1132 timer_start = bs->timer_start;
1133 if (timer_start && (watchdog_time - timer_start) > bs->timeout) {
1134 bs->timer_start = 0; /* turn off timer */
1135 bs->set_timed_out();
1136 Qmsg(jcr, M_ERROR, 0, _(
1137 "Watchdog sending kill after %d secs to thread stalled reading File daemon.\n"),
1138 (int)(watchdog_time - timer_start));
1139 jcr->my_thread_send_signal(TIMEOUT_SIGNAL);
1142 bs = jcr->dir_bsock;
1144 timer_start = bs->timer_start;
1145 if (timer_start && (watchdog_time - timer_start) > bs->timeout) {
1146 bs->timer_start = 0; /* turn off timer */
1147 bs->set_timed_out();
1148 Qmsg(jcr, M_ERROR, 0, _(
1149 "Watchdog sending kill after %d secs to thread stalled reading Director.\n"),
1150 (int)(watchdog_time - timer_start));
1151 jcr->my_thread_send_signal(TIMEOUT_SIGNAL);
1157 Dmsg0(dbglvl, "Finished JCR timeout checks\n");
1161 * Return next JobId from comma separated list
1164 * 1 if next JobId returned
1165 * 0 if no more JobIds are in list
1166 * -1 there is an error
1168 int get_next_jobid_from_list(char **p, uint32_t *JobId)
1170 const int maxlen = 30;
1171 char jobid[maxlen+1];
1175 for (int i=0; i<maxlen; i++) {
1178 } else if (*q == ',') {
1185 if (jobid[0] == 0) {
1187 } else if (!is_a_number(jobid)) {
1188 return -1; /* error */
1191 *JobId = str_to_int64(jobid);
1196 * Timeout signal comes here
1198 extern "C" void timeout_handler(int sig)
1200 return; /* thus interrupting the function */
1203 /* Used to display specific daemon information after a fatal signal
1204 * (like BDB in the director)
1206 #define MAX_DBG_HOOK 10
1207 static dbg_jcr_hook_t *dbg_jcr_hooks[MAX_DBG_HOOK];
1208 static int dbg_jcr_handler_count=0;
1210 void dbg_jcr_add_hook(dbg_jcr_hook_t *hook)
1212 ASSERT(dbg_jcr_handler_count < MAX_DBG_HOOK);
1213 dbg_jcr_hooks[dbg_jcr_handler_count++] = hook;
1216 /* on win32, the pthread_t is a struct, so we don't display it */
1217 #ifdef HAVE_MINGW_W64
1218 # define get_threadid(a) (void *)0
1220 # define get_threadid(a) (void *)(a)
1225 * This function should be used ONLY after a fatal signal. We walk through the
1226 * JCR chain without doing any lock, Bacula should not be running.
1228 void dbg_print_jcr(FILE *fp)
1230 char buf1[128], buf2[128], buf3[128], buf4[128];
1236 fprintf(fp, "Attempt to dump current JCRs. njcrs=%d\n", jcrs->size());
1238 for (JCR *jcr = (JCR *)jcrs->first(); jcr ; jcr = (JCR *)jcrs->next(jcr)) {
1239 fprintf(fp, "threadid=%p JobId=%d JobStatus=%c jcr=%p name=%s\n",
1240 get_threadid(jcr->my_thread_id), (int)jcr->JobId, jcr->JobStatus, jcr, jcr->Job);
1241 fprintf(fp, "\tuse_count=%i killable=%d\n",
1242 jcr->use_count(), jcr->is_killable());
1243 fprintf(fp, "\tJobType=%c JobLevel=%c\n",
1244 jcr->getJobType(), jcr->getJobLevel());
1245 bstrftime(buf1, sizeof(buf1), jcr->sched_time);
1246 bstrftime(buf2, sizeof(buf2), jcr->start_time);
1247 bstrftime(buf3, sizeof(buf3), jcr->end_time);
1248 bstrftime(buf4, sizeof(buf4), jcr->wait_time);
1249 fprintf(fp, "\tsched_time=%s start_time=%s\n\tend_time=%s wait_time=%s\n",
1250 buf1, buf2, buf3, buf4);
1251 fprintf(fp, "\tdb=%p db_batch=%p batch_started=%i\n",
1252 jcr->db, jcr->db_batch, jcr->batch_started);
1255 * Call all the jcr debug hooks
1257 for(int i=0; i < dbg_jcr_handler_count; i++) {
1258 dbg_jcr_hook_t *hook = dbg_jcr_hooks[i];