3 * Bacula Director Job processing routines
5 * Kern Sibbald, October MM
10 Copyright (C) 2000-2003 Kern Sibbald and John Walker
12 This program is free software; you can redistribute it and/or
13 modify it under the terms of the GNU General Public License as
14 published by the Free Software Foundation; either version 2 of
15 the License, or (at your option) any later version.
17 This program is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 General Public License for more details.
22 You should have received a copy of the GNU General Public
23 License along with this program; if not, write to the Free
24 Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
32 /* Forward referenced subroutines */
33 static void *job_thread(void *arg);
34 static char *edit_run_codes(JCR *jcr, char *omsg, char *imsg);
35 static void release_resource_locks(JCR *jcr);
36 static int acquire_resource_locks(JCR *jcr);
38 static void backoff_resource_locks(JCR *jcr, int count);
41 /* Exported subroutines */
42 void run_job(JCR *jcr);
45 /* Imported subroutines */
46 extern void term_scheduler();
47 extern void term_ua_server();
48 extern int do_backup(JCR *jcr);
49 extern int do_restore(JCR *jcr);
50 extern int do_verify(JCR *jcr);
51 extern void backup_cleanup(void);
54 static semlock_t job_lock;
55 static pthread_mutex_t mutex;
56 static pthread_cond_t resource_wait;
58 /* Queue of jobs to be run */
59 workq_t job_wq; /* our job work queue */
62 void init_job_server(int max_workers)
66 if ((stat = sem_init(&job_lock, max_workers)) != 0) {
67 Emsg1(M_ABORT, 0, _("Could not init job lock: ERR=%s\n"), strerror(stat));
69 if ((stat = pthread_mutex_init(&mutex, NULL)) != 0) {
70 Emsg1(M_ABORT, 0, _("Could not init resource mutex: ERR=%s\n"), strerror(stat));
72 if ((stat = pthread_cond_init(&resource_wait, NULL)) != 0) {
73 Emsg1(M_ABORT, 0, _("Could not init resource wait: ERR=%s\n"), strerror(stat));
77 if ((stat = workq_init(&job_wq, max_workers, job_thread)) != 0) {
78 Emsg1(M_ABORT, 0, _("Could not init job work queue: ERR=%s\n"), strerror(stat));
85 * Run a job -- typically called by the scheduler, but may also
86 * be called by the UA (Console program).
89 void run_job(JCR *jcr)
95 workq_ele_t *work_item;
98 sm_check(__FILE__, __LINE__, True);
99 init_msg(jcr, jcr->messages);
100 create_unique_job_name(jcr, jcr->job->hdr.name);
101 jcr->jr.SchedTime = jcr->sched_time;
102 jcr->jr.StartTime = jcr->start_time;
103 jcr->jr.Type = jcr->JobType;
104 jcr->jr.Level = jcr->JobLevel;
105 jcr->jr.JobStatus = jcr->JobStatus;
106 strcpy(jcr->jr.Name, jcr->job->hdr.name);
107 strcpy(jcr->jr.Job, jcr->Job);
109 /* Initialize termination condition variable */
110 if ((errstat = pthread_cond_init(&jcr->term_wait, NULL)) != 0) {
111 Jmsg1(jcr, M_FATAL, 0, _("Unable to init job cond variable: ERR=%s\n"), strerror(errstat));
112 set_jcr_job_status(jcr, JS_ErrorTerminated);
120 Dmsg0(50, "Open database\n");
121 jcr->db=db_init_database(jcr, jcr->catalog->db_name, jcr->catalog->db_user,
122 jcr->catalog->db_password, jcr->catalog->db_address,
123 jcr->catalog->db_port, jcr->catalog->db_socket);
124 if (!db_open_database(jcr, jcr->db)) {
125 Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
126 db_close_database(jcr, jcr->db);
127 set_jcr_job_status(jcr, JS_ErrorTerminated);
131 Dmsg0(50, "DB opened\n");
136 jcr->jr.JobStatus = jcr->JobStatus;
137 if (!db_create_job_record(jcr, jcr->db, &jcr->jr)) {
138 Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
139 db_close_database(jcr, jcr->db);
140 set_jcr_job_status(jcr, JS_ErrorTerminated);
144 jcr->JobId = jcr->jr.JobId;
145 ASSERT(jcr->jr.JobId > 0);
147 Dmsg4(30, "Created job record JobId=%d Name=%s Type=%c Level=%c\n",
148 jcr->JobId, jcr->Job, jcr->jr.Type, jcr->jr.Level);
149 Dmsg0(200, "Add jrc to work queue\n");
152 if ((stat = pthread_create(&tid, NULL, job_thread, (void *)jcr)) != 0) {
153 Emsg1(M_ABORT, 0, _("Unable to create job thread: ERR=%s\n"), strerror(stat));
156 /* Queue the job to be run */
157 if ((stat = workq_add(&job_wq, (void *)jcr, &work_item, 0)) != 0) {
158 Emsg1(M_ABORT, 0, _("Could not add job to work queue: ERR=%s\n"), strerror(stat));
160 jcr->work_item = work_item;
162 Dmsg0(200, "Done run_job()\n");
166 * This is the engine called by workq_add() when we were pulled
167 * from the work queue.
168 * At this point, we are running in our own thread
170 static void *job_thread(void *arg)
173 JCR *jcr = (JCR *)arg;
175 pthread_detach(pthread_self());
177 sm_check(__FILE__, __LINE__, True);
179 if (!acquire_resource_locks(jcr)) {
180 set_jcr_job_status(jcr, JS_Cancelled);
183 Dmsg0(200, "=====Start Job=========\n");
184 jcr->start_time = now; /* set the real start time */
185 Dmsg2(200, "jcr->JobStatus=%d %c\n", jcr->JobStatus, (char)jcr->JobStatus);
186 if (job_cancelled(jcr)) {
187 update_job_end_record(jcr);
188 } else if (jcr->job->MaxStartDelay != 0 && jcr->job->MaxStartDelay <
189 (utime_t)(jcr->start_time - jcr->sched_time)) {
190 Jmsg(jcr, M_FATAL, 0, _("Job cancelled because max start delay time exceeded.\n"));
191 set_jcr_job_status(jcr, JS_Cancelled);
192 update_job_end_record(jcr);
196 set_jcr_job_status(jcr, JS_Running);
198 if (jcr->job->RunBeforeJob) {
199 POOLMEM *before = get_pool_memory(PM_FNAME);
202 before = edit_run_codes(jcr, before, jcr->job->RunBeforeJob);
203 status = run_program(before, 0, NULL);
204 free_pool_memory(before);
206 switch (jcr->JobType) {
209 if (jcr->JobStatus == JS_Terminated) {
215 if (jcr->JobStatus == JS_Terminated) {
221 if (jcr->JobStatus == JS_Terminated) {
228 set_jcr_job_status(jcr, JS_Terminated);
231 Pmsg1(0, "Unimplemented job type: %d\n", jcr->JobType);
234 if (jcr->job->RunAfterJob) {
235 POOLMEM *after = get_pool_memory(PM_FNAME);
238 after = edit_run_codes(jcr, after, jcr->job->RunAfterJob);
239 status = run_program(after, 0, NULL);
240 free_pool_memory(after);
243 release_resource_locks(jcr);
244 Dmsg0(50, "Before free jcr\n");
246 Dmsg0(50, "======== End Job ==========\n");
247 sm_check(__FILE__, __LINE__, True);
251 static int acquire_resource_locks(JCR *jcr)
256 if (jcr->store->sem.valid != SEMLOCK_VALID) {
257 if ((stat = sem_init(&jcr->store->sem, jcr->store->MaxConcurrentJobs)) != 0) {
258 Emsg1(M_ABORT, 0, _("Could not init Storage semaphore: ERR=%s\n"), strerror(stat));
261 if (jcr->client->sem.valid != SEMLOCK_VALID) {
262 if ((stat = sem_init(&jcr->client->sem, jcr->client->MaxConcurrentJobs)) != 0) {
263 Emsg1(M_ABORT, 0, _("Could not init Client semaphore: ERR=%s\n"), strerror(stat));
266 if (jcr->job->sem.valid != SEMLOCK_VALID) {
267 if ((stat = sem_init(&jcr->job->sem, jcr->job->MaxConcurrentJobs)) != 0) {
268 Emsg1(M_ABORT, 0, _("Could not init Job semaphore: ERR=%s\n"), strerror(stat));
273 /* Acquire semaphore */
274 set_jcr_job_status(jcr, JS_WaitJobRes);
275 if ((stat = sem_lock(&jcr->job->sem)) != 0) {
276 Emsg1(M_ABORT, 0, _("Could not acquire Job max jobs lock: ERR=%s\n"), strerror(stat));
278 set_jcr_job_status(jcr, JS_WaitClientRes);
279 if ((stat = sem_trylock(&jcr->client->sem)) != 0) {
281 backoff_resource_locks(jcr, 1);
284 Emsg1(M_ABORT, 0, _("Could not acquire Client max jobs lock: ERR=%s\n"), strerror(stat));
287 set_jcr_job_status(jcr, JS_WaitStoreRes);
288 if ((stat = sem_trylock(&jcr->store->sem)) != 0) {
290 backoff_resource_locks(jcr, 2);
293 Emsg1(M_ABORT, 0, _("Could not acquire Storage max jobs lock: ERR=%s\n"), strerror(stat));
296 set_jcr_job_status(jcr, JS_WaitMaxJobs);
297 if ((stat = sem_trylock(&job_lock)) != 0) {
299 backoff_resource_locks(jcr, 3);
302 Emsg1(M_ABORT, 0, _("Could not acquire max jobs lock: ERR=%s\n"), strerror(stat));
309 /* Wait for some resource to be released */
310 pthread_cond_wait(&resource_wait, &mutex);
319 static void backoff_resource_locks(JCR *jcr, int count)
323 sem_unlock(&jcr->store->sem);
325 sem_unlock(&jcr->client->sem);
327 sem_unlock(&jcr->job->sem);
333 static void release_resource_locks(JCR *jcr)
337 sem_unlock(&jcr->store->sem);
338 sem_unlock(&jcr->client->sem);
339 sem_unlock(&jcr->job->sem);
340 sem_unlock(&job_lock);
341 pthread_cond_signal(&resource_wait);
347 * Get or create a Client record for this Job
349 int get_or_create_client_record(JCR *jcr)
353 memset(&cr, 0, sizeof(cr));
354 strcpy(cr.Name, jcr->client->hdr.name);
355 cr.AutoPrune = jcr->client->AutoPrune;
356 cr.FileRetention = jcr->client->FileRetention;
357 cr.JobRetention = jcr->client->JobRetention;
358 if (jcr->client_name) {
359 free_pool_memory(jcr->client_name);
361 jcr->client_name = get_memory(strlen(jcr->client->hdr.name) + 1);
362 strcpy(jcr->client_name, jcr->client->hdr.name);
363 if (!db_create_client_record(jcr, jcr->db, &cr)) {
364 Jmsg(jcr, M_FATAL, 0, _("Could not create Client record. %s"),
365 db_strerror(jcr->db));
368 jcr->jr.ClientId = cr.ClientId;
370 if (jcr->client_uname) {
371 free_pool_memory(jcr->client_uname);
373 jcr->client_uname = get_memory(strlen(cr.Uname) + 1);
374 strcpy(jcr->client_uname, cr.Uname);
376 Dmsg2(100, "Created Client %s record %d\n", jcr->client->hdr.name,
383 * Write status and such in DB
385 void update_job_end_record(JCR *jcr)
387 if (jcr->jr.EndTime == 0) {
388 jcr->jr.EndTime = time(NULL);
390 jcr->end_time = jcr->jr.EndTime;
391 jcr->jr.JobId = jcr->JobId;
392 jcr->jr.JobStatus = jcr->JobStatus;
393 jcr->jr.JobFiles = jcr->JobFiles;
394 jcr->jr.JobBytes = jcr->JobBytes;
395 jcr->jr.VolSessionId = jcr->VolSessionId;
396 jcr->jr.VolSessionTime = jcr->VolSessionTime;
397 if (!db_update_job_end_record(jcr, jcr->db, &jcr->jr)) {
398 Jmsg(jcr, M_WARNING, 0, _("Error updating job record. %s"),
399 db_strerror(jcr->db));
404 * Takes base_name and appends (unique) current
405 * date and time to form unique job name.
407 * Returns: unique job name in jcr->Job
408 * date/time in jcr->start_time
410 void create_unique_job_name(JCR *jcr, char *base_name)
412 /* Job start mutex */
413 static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
414 static time_t last_start_time = 0;
417 char dt[MAX_TIME_LENGTH];
418 char name[MAX_NAME_LENGTH];
421 /* Guarantee unique start time -- maximum one per second, and
422 * thus unique Job Name
424 P(mutex); /* lock creation of jobs */
426 while (now == last_start_time) {
430 last_start_time = now;
431 V(mutex); /* allow creation of jobs */
432 jcr->start_time = now;
433 /* Form Unique JobName */
434 localtime_r(&now, &tm);
435 /* Use only characters that are permitted in Windows filenames */
436 strftime(dt, sizeof(dt), "%Y-%m-%d_%H.%M.%S", &tm);
437 bstrncpy(name, base_name, sizeof(name));
438 name[sizeof(name)-22] = 0; /* truncate if too long */
439 sprintf(jcr->Job, "%s.%s", name, dt); /* add date & time */
440 /* Convert spaces into underscores */
441 for (p=jcr->Job; *p; p++) {
449 * Free the Job Control Record if no one is still using it.
450 * Called from main free_jcr() routine in src/lib/jcr.c so
451 * that we can do our Director specific cleanup of the jcr.
453 void dird_free_jcr(JCR *jcr)
455 Dmsg0(200, "Start dird free_jcr\n");
457 if (jcr->file_bsock) {
458 Dmsg0(200, "Close File bsock\n");
459 bnet_close(jcr->file_bsock);
461 if (jcr->store_bsock) {
462 Dmsg0(200, "Close Store bsock\n");
463 bnet_close(jcr->store_bsock);
466 Dmsg0(200, "Free JCR fname\n");
467 free_pool_memory(jcr->fname);
470 Dmsg0(200, "Free JCR stime\n");
471 free_pool_memory(jcr->stime);
474 Dmsg0(200, "Close DB\n");
475 db_close_database(jcr, jcr->db);
477 if (jcr->RestoreWhere) {
478 free(jcr->RestoreWhere);
480 if (jcr->RestoreBootstrap) {
481 free(jcr->RestoreBootstrap);
483 Dmsg0(200, "End dird free_jcr\n");
487 * Set some defaults in the JCR necessary to
488 * run. These items are pulled from the job
489 * definition as defaults, but can be overridden
490 * later either by the Run record in the Schedule resource,
491 * or by the Console program.
493 void set_jcr_defaults(JCR *jcr, JOB *job)
496 jcr->JobType = job->JobType;
497 jcr->JobLevel = job->level;
498 jcr->store = job->storage;
499 jcr->client = job->client;
500 if (jcr->client_name) {
501 free_pool_memory(jcr->client_name);
503 jcr->client_name = get_memory(strlen(jcr->client->hdr.name) + 1);
504 strcpy(jcr->client_name, jcr->client->hdr.name);
505 jcr->pool = job->pool;
506 jcr->catalog = job->client->catalog;
507 jcr->fileset = job->fileset;
508 jcr->messages = job->messages;
509 if (jcr->RestoreBootstrap) {
510 free(jcr->RestoreBootstrap);
512 /* This can be overridden by Console program */
513 if (job->RestoreBootstrap) {
514 jcr->RestoreBootstrap = bstrdup(job->RestoreBootstrap);
516 /* If no default level given, set one */
517 if (jcr->JobLevel == 0) {
518 switch (jcr->JobType) {
520 jcr->JobLevel = L_VERIFY_CATALOG;
523 jcr->JobLevel = L_INCREMENTAL;
532 * Edit codes into Run command
535 * %d = Director's name
543 * omsg = edited output message
544 * imsg = input string containing edit codes (%x)
547 static char *edit_run_codes(JCR *jcr, char *omsg, char *imsg)
554 Dmsg1(200, "edit_run_codes: %s\n", imsg);
555 for (p=imsg; *p; p++) {
562 str = jcr->client_name;
571 str = job_status_to_str(jcr->JobStatus);
574 sprintf(add, "%d", jcr->JobId);
581 str = job_level_to_str(jcr->JobLevel);
584 str = jcr->job->hdr.name;
587 str = job_type_to_str(jcr->JobType);
601 Dmsg1(200, "add_str %s\n", str);
602 pm_strcat(&omsg, (char *)str);
603 Dmsg1(200, "omsg=%s\n", omsg);