2 * Manipulation routines for Job Control Records
4 * Kern E. Sibbald, December 2000
8 * These routines are thread safe.
12 Copyright (C) 2000-2003 Kern Sibbald and John Walker
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of
17 the License, or (at your option) any later version.
19 This program is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
24 You should have received a copy of the GNU General Public
25 License along with this program; if not, write to the Free
26 Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
34 /* External variables we reference */
35 extern time_t watchdog_time;
37 /* Forward referenced functions */
38 static void timeout_handler(int sig);
39 static void jcr_timeout_check(watchdog_t *self);
41 struct s_last_job last_job; /* last job run by this daemon */
43 #define MAX_LAST_JOBS 10
45 static JCR *jobs = NULL; /* pointer to JCR chain */
47 /* Mutex for locking various jcr chains while updating */
48 static pthread_mutex_t jcr_chain_mutex = PTHREAD_MUTEX_INITIALIZER;
50 void init_last_jobs_list()
52 struct s_last_job *job_entry;
53 last_jobs = new dlist(job_entry, &job_entry->link);
54 memset(&last_job, 0, sizeof(last_job));
57 void term_last_jobs_list()
59 for (void *je=NULL; (je=last_jobs->next(je)); ) {
65 void lock_last_jobs_list()
67 /* Use jcr chain mutex */
71 void unlock_last_jobs_list()
73 /* Use jcr chain mutex */
78 * Create a Job Control Record and link it into JCR chain
79 * Returns newly allocated JCR
80 * Note, since each daemon has a different JCR, he passes
83 JCR *new_jcr(int size, JCR_free_HANDLER *daemon_free_jcr)
86 struct sigaction sigtimer;
88 Dmsg0(200, "Enter new_jcr\n");
89 jcr = (JCR *)malloc(size);
91 jcr->my_thread_id = pthread_self();
92 jcr->sched_time = time(NULL);
93 jcr->daemon_free_jcr = daemon_free_jcr; /* plug daemon free routine */
95 pthread_mutex_init(&(jcr->mutex), NULL);
96 jcr->JobStatus = JS_Created; /* ready to run */
97 jcr->VolumeName = get_pool_memory(PM_FNAME);
98 jcr->VolumeName[0] = 0;
99 jcr->errmsg = get_pool_memory(PM_MESSAGE);
101 strcpy(jcr->Job, "*Console*"); /* default */
103 sigtimer.sa_flags = 0;
104 sigtimer.sa_handler = timeout_handler;
105 sigfillset(&sigtimer.sa_mask);
106 sigaction(TIMEOUT_SIGNAL, &sigtimer, NULL);
121 * Remove a JCR from the chain
122 * NOTE! The chain must be locked prior to calling
125 static void remove_jcr(JCR *jcr)
127 Dmsg0(150, "Enter remove_jcr\n");
129 Emsg0(M_ABORT, 0, "NULL jcr.\n");
131 if (!jcr->prev) { /* if no prev */
132 jobs = jcr->next; /* set new head */
134 jcr->prev->next = jcr->next; /* update prev */
137 jcr->next->prev = jcr->prev;
139 Dmsg0(150, "Leave remove_jcr\n");
143 * Free stuff common to all JCRs. N.B. Be careful to include only
144 * generic stuff in the common part of the jcr.
146 static void free_common_jcr(JCR *jcr)
148 /* Keep some statistics */
149 switch (jcr->JobType) {
155 last_job.JobType = jcr->JobType;
156 last_job.JobId = jcr->JobId;
157 last_job.VolSessionId = jcr->VolSessionId;
158 last_job.VolSessionTime = jcr->VolSessionTime;
159 bstrncpy(last_job.Job, jcr->Job, sizeof(last_job.Job));
160 last_job.JobFiles = jcr->JobFiles;
161 last_job.JobBytes = jcr->JobBytes;
162 last_job.JobStatus = jcr->JobStatus;
163 last_job.JobLevel = jcr->JobLevel;
164 last_job.start_time = jcr->start_time;
165 last_job.end_time = time(NULL);
170 pthread_mutex_destroy(&jcr->mutex);
172 close_msg(jcr); /* close messages for this job */
174 /* do this after closing messages */
175 if (jcr->client_name) {
176 free_pool_memory(jcr->client_name);
177 jcr->client_name = NULL;
180 if (jcr->sd_auth_key) {
181 free(jcr->sd_auth_key);
182 jcr->sd_auth_key = NULL;
184 if (jcr->VolumeName) {
185 free_pool_memory(jcr->VolumeName);
186 jcr->VolumeName = NULL;
189 if (jcr->dir_bsock) {
190 bnet_close(jcr->dir_bsock);
191 jcr->dir_bsock = NULL;
194 free_pool_memory(jcr->errmsg);
201 if (jcr->cached_path) {
202 free_pool_memory(jcr->cached_path);
203 jcr->cached_path = NULL;
206 free_getuser_cache();
207 free_getgroup_cache();
212 * Global routine to free a jcr
215 void b_free_jcr(char *file, int line, JCR *jcr)
217 Dmsg3(200, "Enter free_jcr 0x%x from %s:%d\n", jcr, file, line);
221 void free_jcr(JCR *jcr)
224 Dmsg1(200, "Enter free_jcr 0x%x\n", jcr);
227 struct s_last_job *je;
230 jcr->use_count--; /* decrement use count */
231 Dmsg3(200, "Dec jcr 0x%x use_count=%d jobid=%d\n", jcr, jcr->use_count, jcr->JobId);
232 if (jcr->use_count > 0) { /* if in use */
234 Dmsg2(200, "jcr 0x%x use_count=%d\n", jcr, jcr->use_count);
239 Dmsg1(200, "End job=%d\n", jcr->JobId);
240 if (jcr->daemon_free_jcr) {
241 jcr->daemon_free_jcr(jcr); /* call daemon free routine */
244 free_common_jcr(jcr);
246 /* Keep list of last jobs, but not Console where JobId==0 */
247 if (last_job.JobId > 0) {
248 je = (struct s_last_job *)malloc(sizeof(struct s_last_job));
249 memcpy((char *)je, (char *)&last_job, sizeof(last_job));
250 last_jobs->append(je);
251 if (last_jobs->size() > MAX_LAST_JOBS) {
252 last_jobs->remove(last_jobs->first());
254 last_job.JobId = 0; /* zap last job */
256 close_msg(NULL); /* flush any daemon messages */
258 Dmsg0(200, "Exit free_jcr\n");
263 * Global routine to free a jcr
264 * JCR chain is already locked
266 void free_locked_jcr(JCR *jcr)
268 jcr->use_count--; /* decrement use count */
269 Dmsg2(200, "Dec jcr 0x%x use_count=%d\n", jcr, jcr->use_count);
270 if (jcr->use_count > 0) { /* if in use */
274 jcr->daemon_free_jcr(jcr); /* call daemon free routine */
275 free_common_jcr(jcr);
282 * Given a JobId, find the JCR
283 * Returns: jcr on success
286 JCR *get_jcr_by_id(uint32_t JobId)
290 P(jcr_chain_mutex); /* lock chain */
291 for (jcr = jobs; jcr; jcr=jcr->next) {
292 if (jcr->JobId == JobId) {
296 Dmsg2(200, "Inc jcr 0x%x use_count=%d\n", jcr, jcr->use_count);
305 * Given a SessionId and SessionTime, find the JCR
306 * Returns: jcr on success
309 JCR *get_jcr_by_session(uint32_t SessionId, uint32_t SessionTime)
314 for (jcr = jobs; jcr; jcr=jcr->next) {
315 if (jcr->VolSessionId == SessionId &&
316 jcr->VolSessionTime == SessionTime) {
320 Dmsg2(200, "Inc jcr 0x%x use_count=%d\n", jcr, jcr->use_count);
330 * Given a Job, find the JCR
331 * compares on the number of characters in Job
332 * thus allowing partial matches.
333 * Returns: jcr on success
336 JCR *get_jcr_by_partial_name(char *Job)
346 for (jcr = jobs; jcr; jcr=jcr->next) {
347 if (strncmp(Job, jcr->Job, len) == 0) {
351 Dmsg2(200, "Inc jcr 0x%x use_count=%d\n", jcr, jcr->use_count);
361 * Given a Job, find the JCR
362 * requires an exact match of names.
363 * Returns: jcr on success
366 JCR *get_jcr_by_full_name(char *Job)
374 for (jcr = jobs; jcr; jcr=jcr->next) {
375 if (strcmp(jcr->Job, Job) == 0) {
379 Dmsg2(200, "Inc jcr 0x%x use_count=%d\n", jcr, jcr->use_count);
387 void set_jcr_job_status(JCR *jcr, int JobStatus)
390 * For a set of errors, ... keep the current status
391 * so it isn't lost. For all others, set it.
393 switch (jcr->JobStatus) {
394 case JS_ErrorTerminated:
401 jcr->JobStatus = JobStatus;
408 void lock_jcr_chain()
416 void unlock_jcr_chain()
422 JCR *get_next_jcr(JCR *jcr)
435 Dmsg1(200, "Inc jcr use_count=%d\n", rjcr->use_count);
440 bool init_jcr_subsystem(void)
442 watchdog_t *wd = watchdog_new();
444 wd->one_shot = false;
445 wd->interval = 30; /* FIXME: should be configurable somewhere, even
446 if only with a #define */
447 wd->callback = jcr_timeout_check;
449 register_watchdog(wd);
454 static void jcr_timeout_check(watchdog_t *self)
460 Dmsg0(200, "Start JCR timeout checks\n");
462 /* Walk through all JCRs checking if any one is
463 * blocked for more than specified max time.
466 for (jcr=NULL; (jcr=get_next_jcr(jcr)); ) {
467 free_locked_jcr(jcr); /* OK to free now cuz chain is locked */
468 if (jcr->JobId == 0) {
471 fd = jcr->store_bsock;
473 timer_start = fd->timer_start;
474 if (timer_start && (watchdog_time - timer_start) > fd->timeout) {
475 fd->timer_start = 0; /* turn off timer */
476 fd->timed_out = TRUE;
477 Jmsg(jcr, M_ERROR, 0, _(
478 "Watchdog sending kill after %d secs to thread stalled reading Storage daemon.\n"),
479 watchdog_time - timer_start);
480 pthread_kill(jcr->my_thread_id, TIMEOUT_SIGNAL);
483 fd = jcr->file_bsock;
485 timer_start = fd->timer_start;
486 if (timer_start && (watchdog_time - timer_start) > fd->timeout) {
487 fd->timer_start = 0; /* turn off timer */
488 fd->timed_out = TRUE;
489 Jmsg(jcr, M_ERROR, 0, _(
490 "Watchdog sending kill after %d secs to thread stalled reading File daemon.\n"),
491 watchdog_time - timer_start);
492 pthread_kill(jcr->my_thread_id, TIMEOUT_SIGNAL);
497 timer_start = fd->timer_start;
498 if (timer_start && (watchdog_time - timer_start) > fd->timeout) {
499 fd->timer_start = 0; /* turn off timer */
500 fd->timed_out = TRUE;
501 Jmsg(jcr, M_ERROR, 0, _(
502 "Watchdog sending kill after %d secs to thread stalled reading Director.\n"),
503 watchdog_time - timer_start);
504 pthread_kill(jcr->my_thread_id, TIMEOUT_SIGNAL);
511 Dmsg0(200, "Finished JCR timeout checks\n");
515 * Timeout signal comes here
517 void timeout_handler(int sig)
519 return; /* thus interrupting the function */