* Version $Id$
*/
/*
- Copyright (C) 2000-2004 Kern Sibbald and John Walker
+ Copyright (C) 2000-2005 Kern Sibbald
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
static bool job_check_maxwaittime(JCR *control_jcr, JCR *jcr);
static bool job_check_maxruntime(JCR *control_jcr, JCR *jcr);
-/* Exported subroutines */
-
/* Imported subroutines */
extern void term_scheduler();
extern void term_ua_server();
-extern int do_backup(JCR *jcr);
-extern int do_admin(JCR *jcr);
-extern int do_restore(JCR *jcr);
-extern int do_verify(JCR *jcr);
/* Imported variables */
extern time_t watchdog_time;
-jobq_t job_queue;
+jobq_t job_queue;
void init_job_server(int max_workers)
{
int stat;
watchdog_t *wd;
-
+
if ((stat = jobq_init(&job_queue, max_workers, job_thread)) != 0) {
- Emsg1(M_ABORT, 0, _("Could not init job queue: ERR=%s\n"), strerror(stat));
- }
- if ((wd = new_watchdog()) == NULL) {
- Emsg0(M_ABORT, 0, _("Could not init job monitor watchdogs\n"));
+ berrno be;
+ Emsg1(M_ABORT, 0, _("Could not init job queue: ERR=%s\n"), be.strerror(stat));
}
+ wd = new_watchdog();
wd->callback = job_monitor_watchdog;
wd->destructor = job_monitor_destructor;
wd->one_shot = false;
register_watchdog(wd);
}
+void term_job_server()
+{
+ jobq_destroy(&job_queue); /* ignore any errors */
+}
+
/*
* Run a job -- typically called by the scheduler, but may also
* be called by the UA (Console program).
*
+ * Returns: 0 on failure
+ * JobId on success
+ *
*/
-void run_job(JCR *jcr)
+JobId_t run_job(JCR *jcr)
{
int stat, errstat;
+ JobId_t JobId = 0;
P(jcr->mutex);
sm_check(__FILE__, __LINE__, true);
/* Initialize termination condition variable */
if ((errstat = pthread_cond_init(&jcr->term_wait, NULL)) != 0) {
- Jmsg1(jcr, M_FATAL, 0, _("Unable to init job cond variable: ERR=%s\n"), strerror(errstat));
+ berrno be;
+ Jmsg1(jcr, M_FATAL, 0, _("Unable to init job cond variable: ERR=%s\n"), be.strerror(errstat));
goto bail_out;
}
jcr->term_wait_inited = true;
Dmsg0(50, "Open database\n");
jcr->db=db_init_database(jcr, jcr->catalog->db_name, jcr->catalog->db_user,
jcr->catalog->db_password, jcr->catalog->db_address,
- jcr->catalog->db_port, jcr->catalog->db_socket);
+ jcr->catalog->db_port, jcr->catalog->db_socket,
+ jcr->catalog->mult_db_connections);
if (!jcr->db || !db_open_database(jcr, jcr->db)) {
Jmsg(jcr, M_FATAL, 0, _("Could not open database \"%s\".\n"),
jcr->catalog->db_name);
Dmsg0(50, "DB opened\n");
/*
- * Create Job record
+ * Create Job record
*/
create_unique_job_name(jcr, jcr->job->hdr.name);
set_jcr_job_status(jcr, JS_Created);
Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
goto bail_out;
}
- jcr->JobId = jcr->jr.JobId;
+ JobId = jcr->JobId = jcr->jr.JobId;
- Dmsg4(100, "Created job record JobId=%d Name=%s Type=%c Level=%c\n",
+ Dmsg4(100, "Created job record JobId=%d Name=%s Type=%c Level=%c\n",
jcr->JobId, jcr->Job, jcr->jr.JobType, jcr->jr.JobLevel);
+
+ if (!get_or_create_client_record(jcr)) {
+ goto bail_out;
+ }
+
+ if (!jcr->fname) {
+ jcr->fname = get_pool_memory(PM_FNAME);
+ }
+
+ /* Now, do pre-run stuff, like setting job level (Inc/diff, ...) */
+ switch (jcr->JobType) {
+ case JT_BACKUP:
+ if (!do_backup_init(jcr)) {
+ backup_cleanup(jcr, JS_ErrorTerminated);
+ }
+ break;
+ case JT_VERIFY:
+ if (!do_verify_init(jcr)) {
+ verify_cleanup(jcr, JS_ErrorTerminated);
+ }
+ break;
+ case JT_RESTORE:
+ if (!do_restore_init(jcr)) {
+ restore_cleanup(jcr, JS_ErrorTerminated);
+ }
+ break;
+ case JT_ADMIN:
+ if (!do_admin_init(jcr)) {
+ admin_cleanup(jcr, JS_ErrorTerminated);
+ }
+ break;
+ case JT_MIGRATION:
+ case JT_COPY:
+ case JT_ARCHIVE:
+ if (!do_mac_init(jcr)) { /* migration, archive, copy */
+ mac_cleanup(jcr, JS_ErrorTerminated);
+ }
+ break;
+ default:
+ Pmsg1(0, "Unimplemented job type: %d\n", jcr->JobType);
+ set_jcr_job_status(jcr, JS_ErrorTerminated);
+ break;
+ }
+ if (job_canceled(jcr)) {
+ goto bail_out;
+ }
+
Dmsg0(200, "Add jrc to work queue\n");
/* Queue the job to be run */
if ((stat = jobq_add(&job_queue, jcr)) != 0) {
- Jmsg(jcr, M_FATAL, 0, _("Could not add job queue: ERR=%s\n"), strerror(stat));
+ berrno be;
+ Jmsg(jcr, M_FATAL, 0, _("Could not add job queue: ERR=%s\n"), be.strerror(stat));
+ JobId = 0;
goto bail_out;
}
Dmsg0(100, "Done run_job()\n");
V(jcr->mutex);
- return;
+ return JobId;
bail_out:
- set_jcr_job_status(jcr, JS_ErrorTerminated);
+ if (jcr->fname) {
+ free_memory(jcr->fname);
+ jcr->fname = NULL;
+ }
V(jcr->mutex);
- return;
+ return JobId;
}
-/*
- * This is the engine called by jobq.c:jobq_add() when we were pulled
+/*
+ * This is the engine called by jobq.c:jobq_add() when we were pulled
* from the work queue.
* At this point, we are running in our own thread and all
* necessary resources are allocated -- see jobq.c
for ( ;; ) {
Dmsg0(200, "=====Start Job=========\n");
jcr->start_time = time(NULL); /* set the real start time */
+ jcr->jr.StartTime = jcr->start_time;
set_jcr_job_status(jcr, JS_Running);
+ if (!db_update_job_start_record(jcr, jcr->db, &jcr->jr)) {
+ Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
+ }
if (job_canceled(jcr)) {
update_job_end_record(jcr);
} else {
/* Run Job */
+ generate_event(jcr, "StartJob");
if (jcr->job->RunBeforeJob) {
POOLMEM *before = get_pool_memory(PM_FNAME);
int status;
BPIPE *bpipe;
char line[MAXSTRING];
-
+
before = edit_job_codes(jcr, before, jcr->job->RunBeforeJob, "");
bpipe = open_bpipe(before, 0, "r");
free_pool_memory(before);
}
status = close_bpipe(bpipe);
if (status != 0) {
- Jmsg(jcr, M_FATAL, 0, _("RunBeforeJob returned non-zero status=%d\n"),
- status);
+ berrno be;
+ Jmsg(jcr, M_FATAL, 0, _("RunBeforeJob error: ERR=%s\n"), be.strerror(status));
set_jcr_job_status(jcr, JS_FatalError);
update_job_end_record(jcr);
goto bail_out;
}
switch (jcr->JobType) {
case JT_BACKUP:
- do_backup(jcr);
- if (jcr->JobStatus == JS_Terminated) {
+ if (do_backup(jcr)) {
do_autoprune(jcr);
+ } else {
+ backup_cleanup(jcr, JS_ErrorTerminated);
}
break;
case JT_VERIFY:
- do_verify(jcr);
- if (jcr->JobStatus == JS_Terminated) {
+ if (do_verify(jcr)) {
do_autoprune(jcr);
+ } else {
+ verify_cleanup(jcr, JS_ErrorTerminated);
}
break;
case JT_RESTORE:
- do_restore(jcr);
- if (jcr->JobStatus == JS_Terminated) {
+ if (do_restore(jcr)) {
do_autoprune(jcr);
+ } else {
+ restore_cleanup(jcr, JS_ErrorTerminated);
}
break;
case JT_ADMIN:
- do_admin(jcr);
- if (jcr->JobStatus == JS_Terminated) {
+ if (do_admin(jcr)) {
+ do_autoprune(jcr);
+ } else {
+ admin_cleanup(jcr, JS_ErrorTerminated);
+ }
+ break;
+ case JT_MIGRATION:
+ case JT_COPY:
+ case JT_ARCHIVE:
+ if (do_mac(jcr)) { /* migration, archive, copy */
do_autoprune(jcr);
+ } else {
+ mac_cleanup(jcr, JS_ErrorTerminated);
}
break;
default:
int status;
BPIPE *bpipe;
char line[MAXSTRING];
-
+
if (jcr->JobStatus == JS_Terminated) {
after = edit_job_codes(jcr, after, jcr->job->RunAfterJob, "");
} else {
status = close_bpipe(bpipe);
/*
* Note, if we get an error here, do not mark the
- * job in error, simply report the error condition.
+ * job in error, simply report the error condition.
*/
if (status != 0) {
+ berrno be;
if (jcr->JobStatus == JS_Terminated) {
- Jmsg(jcr, M_WARNING, 0, _("RunAfterJob returned non-zero status=%d\n"),
- status);
+ Jmsg(jcr, M_WARNING, 0, _("RunAfterJob error: ERR=%s\n"), be.strerror(status));
} else {
- Jmsg(jcr, M_FATAL, 0, _("RunAfterFailedJob returned non-zero status=%d\n"),
- status);
+ Jmsg(jcr, M_FATAL, 0, _("RunAfterFailedJob error: ERR=%s\n"), be.strerror(status));
}
}
}
+ generate_event(jcr, "EndJob");
/* Send off any queued messages */
if (jcr->msg_queue->size() > 0) {
dequeue_messages(jcr);
break;
}
- Dmsg0(50, "======== End Job ==========\n");
+ Dmsg1(50, "======== End Job stat=%c ==========\n", jcr->JobStatus);
sm_check(__FILE__, __LINE__, true);
return NULL;
}
/*
* Cancel a job -- typically called by the UA (Console program), but may also
* be called by the job watchdog.
- *
+ *
* Returns: 1 if cancel appears to be successful
* 0 on failure. Message sent to ua->jcr.
*/
jcr->JobId, jcr->Job);
jobq_remove(&job_queue, jcr); /* attempt to remove it from queue */
return 1;
-
+
default:
set_jcr_job_status(jcr, JS_Canceled);
/* Cancel Storage daemon */
if (jcr->store_bsock) {
- ua->jcr->store = jcr->store;
+ if (!ua->jcr->storage) {
+ copy_storage(ua->jcr, jcr);
+ } else {
+ set_storage(ua->jcr, jcr->store);
+ }
if (!connect_to_storage_daemon(ua->jcr, 10, SDConnectTimeout, 1)) {
bsendmsg(ua, _("Failed to connect to Storage daemon.\n"));
return 0;
control_jcr = (JCR *)self->data;
- Dmsg1(400, "job_monitor_watchdog %p called\n", self);
+ Dmsg1(800, "job_monitor_watchdog %p called\n", self);
lock_jcr_chain();
bool cancel;
if (jcr->JobId == 0) {
- Dmsg2(400, "Skipping JCR %p (%s) with JobId 0\n",
+ Dmsg2(800, "Skipping JCR %p (%s) with JobId 0\n",
jcr, jcr->Job);
/* Keep reference counts correct */
free_locked_jcr(jcr);
cancel |= job_check_maxruntime(control_jcr, jcr);
if (cancel) {
- Dmsg3(200, "Cancelling JCR %p jobid %d (%s)\n",
+ Dmsg3(800, "Cancelling JCR %p jobid %d (%s)\n",
jcr, jcr->JobId, jcr->Job);
UAContext *ua = new_ua_context(jcr);
cancel_job(ua, jcr);
free_ua_context(ua);
- Dmsg1(200, "Have cancelled JCR %p\n", jcr);
+ Dmsg1(800, "Have cancelled JCR %p\n", jcr);
}
/* Keep reference counts correct */
static bool job_check_maxwaittime(JCR *control_jcr, JCR *jcr)
{
bool cancel = false;
+ bool ok_to_cancel = false;
+ JOB *job = jcr->job;
- if (jcr->job->MaxWaitTime == 0) {
+ if (job->MaxWaitTime == 0 && job->FullMaxWaitTime == 0 &&
+ job->IncMaxWaitTime == 0 && job->DiffMaxWaitTime == 0) {
return false;
- }
- if ((watchdog_time - jcr->start_time) < jcr->job->MaxWaitTime) {
- Dmsg3(200, "Job %p (%s) with MaxWaitTime %d not expired\n",
- jcr, jcr->Job, jcr->job->MaxWaitTime);
+ }
+ if (jcr->JobLevel == L_FULL && job->FullMaxWaitTime != 0 &&
+ (watchdog_time - jcr->start_time) >= job->FullMaxWaitTime) {
+ ok_to_cancel = true;
+ } else if (jcr->JobLevel == L_DIFFERENTIAL && job->DiffMaxWaitTime != 0 &&
+ (watchdog_time - jcr->start_time) >= job->DiffMaxWaitTime) {
+ ok_to_cancel = true;
+ } else if (jcr->JobLevel == L_INCREMENTAL && job->IncMaxWaitTime != 0 &&
+ (watchdog_time - jcr->start_time) >= job->IncMaxWaitTime) {
+ ok_to_cancel = true;
+ } else if (job->MaxWaitTime != 0 &&
+ (watchdog_time - jcr->start_time) >= job->MaxWaitTime) {
+ ok_to_cancel = true;
+ }
+ if (!ok_to_cancel) {
return false;
}
- Dmsg3(200, "Job %d (%s): MaxWaitTime of %d seconds exceeded, "
+ Dmsg3(800, "Job %d (%s): MaxWaitTime of %d seconds exceeded, "
"checking status\n",
- jcr->JobId, jcr->Job, jcr->job->MaxWaitTime);
+ jcr->JobId, jcr->Job, job->MaxWaitTime);
switch (jcr->JobStatus) {
case JS_Created:
case JS_Blocked:
Dmsg0(200, "JCR blocked in #1\n");
break;
case JS_Running:
- Dmsg0(200, "JCR running, checking SD status\n");
+ Dmsg0(800, "JCR running, checking SD status\n");
switch (jcr->SDJobStatus) {
case JS_WaitMount:
case JS_WaitMedia:
case JS_WaitFD:
cancel = true;
- Dmsg0(200, "JCR blocked in #2\n");
+ Dmsg0(800, "JCR blocked in #2\n");
break;
default:
- Dmsg0(200, "JCR not blocked in #2\n");
+ Dmsg0(800, "JCR not blocked in #2\n");
break;
}
break;
case JS_ErrorTerminated:
case JS_Canceled:
case JS_FatalError:
- Dmsg0(200, "JCR already dead in #3\n");
+ Dmsg0(800, "JCR already dead in #3\n");
break;
default:
Jmsg1(jcr, M_ERROR, 0, _("Unhandled job status code %d\n"),
jcr->JobStatus);
}
- Dmsg3(200, "MaxWaitTime result: %scancel JCR %p (%s)\n",
+ Dmsg3(800, "MaxWaitTime result: %scancel JCR %p (%s)\n",
cancel ? "" : "do not ", jcr, jcr->job);
return cancel;
if (!jcr->client_name) {
jcr->client_name = get_pool_memory(PM_NAME);
}
- pm_strcpy(&jcr->client_name, jcr->client->hdr.name);
+ pm_strcpy(jcr->client_name, jcr->client->hdr.name);
if (!db_create_client_record(jcr, jcr->db, &cr)) {
- Jmsg(jcr, M_FATAL, 0, _("Could not create Client record. ERR=%s\n"),
+ Jmsg(jcr, M_FATAL, 0, _("Could not create Client record. ERR=%s\n"),
db_strerror(jcr->db));
return false;
}
if (!jcr->client_uname) {
jcr->client_uname = get_pool_memory(PM_NAME);
}
- pm_strcpy(&jcr->client_uname, cr.Uname);
+ pm_strcpy(jcr->client_uname, cr.Uname);
}
- Dmsg2(100, "Created Client %s record %d\n", jcr->client->hdr.name,
+ Dmsg2(100, "Created Client %s record %d\n", jcr->client->hdr.name,
jcr->jr.ClientId);
return true;
}
} else {
Jmsg(jcr, M_WARNING, 0, _("FileSet MD5 signature not found.\n"));
}
- if (!db_create_fileset_record(jcr, jcr->db, fsr)) {
- Jmsg(jcr, M_ERROR, 0, _("Could not create FileSet \"%s\" record. ERR=%s\n"),
- fsr->FileSet, db_strerror(jcr->db));
- return false;
- }
+ if (!jcr->fileset->ignore_fs_changes ||
+ !db_get_fileset_record(jcr, jcr->db, fsr)) {
+ if (!db_create_fileset_record(jcr, jcr->db, fsr)) {
+ Jmsg(jcr, M_ERROR, 0, _("Could not create FileSet \"%s\" record. ERR=%s\n"),
+ fsr->FileSet, db_strerror(jcr->db));
+ return false;
+ }
+ }
jcr->jr.FileSetId = fsr->FileSetId;
if (fsr->created) {
- Jmsg(jcr, M_INFO, 0, _("Created new FileSet record \"%s\" %s\n"),
+ Jmsg(jcr, M_INFO, 0, _("Created new FileSet record \"%s\" %s\n"),
fsr->FileSet, fsr->cCreateTime);
}
- Dmsg2(119, "Created FileSet %s record %u\n", jcr->fileset->hdr.name,
+ Dmsg2(119, "Created FileSet %s record %u\n", jcr->fileset->hdr.name,
jcr->jr.FileSetId);
return true;
}
jcr->jr.VolSessionId = jcr->VolSessionId;
jcr->jr.VolSessionTime = jcr->VolSessionTime;
if (!db_update_job_end_record(jcr, jcr->db, &jcr->jr)) {
- Jmsg(jcr, M_WARNING, 0, _("Error updating job record. %s"),
+ Jmsg(jcr, M_WARNING, 0, _("Error updating job record. %s"),
db_strerror(jcr->db));
}
}
char *p;
/* Guarantee unique start time -- maximum one per second, and
- * thus unique Job Name
+ * thus unique Job Name
*/
P(mutex); /* lock creation of jobs */
now = time(NULL);
/* Form Unique JobName */
localtime_r(&now, &tm);
/* Use only characters that are permitted in Windows filenames */
- strftime(dt, sizeof(dt), "%Y-%m-%d_%H.%M.%S", &tm);
+ strftime(dt, sizeof(dt), "%Y-%m-%d_%H.%M.%S", &tm);
bstrncpy(name, base_name, sizeof(name));
name[sizeof(name)-22] = 0; /* truncate if too long */
bsnprintf(jcr->Job, sizeof(jcr->Job), "%s.%s", name, dt); /* add date & time */
}
}
-/*
- * Free the Job Control Record if no one is still using it.
- * Called from main free_jcr() routine in src/lib/jcr.c so
- * that we can do our Director specific cleanup of the jcr.
- */
-void dird_free_jcr(JCR *jcr)
+/* Called directly from job rescheduling */
+void dird_free_jcr_pointers(JCR *jcr)
{
- Dmsg0(200, "Start dird free_jcr\n");
-
if (jcr->sd_auth_key) {
free(jcr->sd_auth_key);
jcr->sd_auth_key = NULL;
bnet_close(jcr->store_bsock);
jcr->store_bsock = NULL;
}
- if (jcr->fname) {
+ if (jcr->fname) {
Dmsg0(200, "Free JCR fname\n");
free_pool_memory(jcr->fname);
jcr->fname = NULL;
}
if (jcr->term_wait_inited) {
pthread_cond_destroy(&jcr->term_wait);
+ jcr->term_wait_inited = false;
+ }
+}
+
+/*
+ * Free the Job Control Record if no one is still using it.
+ * Called from main free_jcr() routine in src/lib/jcr.c so
+ * that we can do our Director specific cleanup of the jcr.
+ */
+void dird_free_jcr(JCR *jcr)
+{
+ Dmsg0(200, "Start dird free_jcr\n");
+
+ dird_free_jcr_pointers(jcr);
+
+ /* Delete lists setup to hold storage pointers */
+ if (jcr->storage) {
+ delete jcr->storage;
}
jcr->job_end_push.destroy();
Dmsg0(200, "End dird free_jcr\n");
*/
void set_jcr_defaults(JCR *jcr, JOB *job)
{
+ STORE *st;
jcr->job = job;
jcr->JobType = job->JobType;
switch (jcr->JobType) {
jcr->JobLevel = L_NONE;
break;
default:
- jcr->JobLevel = job->level;
+ jcr->JobLevel = job->JobLevel;
break;
}
jcr->JobPriority = job->Priority;
- jcr->store = job->storage;
+ /* Copy storage definitions -- deleted in dir_free_jcr above */
+ if (job->storage) {
+ if (jcr->storage) {
+ delete jcr->storage;
+ }
+ jcr->storage = New(alist(10, not_owned_by_alist));
+ foreach_alist(st, job->storage) {
+ jcr->storage->append(st);
+ }
+ }
+ if (jcr->storage) {
+ jcr->store = (STORE *)jcr->storage->first();
+ }
jcr->client = job->client;
if (!jcr->client_name) {
jcr->client_name = get_pool_memory(PM_NAME);
}
- pm_strcpy(&jcr->client_name, jcr->client->hdr.name);
+ pm_strcpy(jcr->client_name, jcr->client->hdr.name);
jcr->pool = job->pool;
jcr->full_pool = job->full_pool;
jcr->inc_pool = job->inc_pool;
jcr->dif_pool = job->dif_pool;
jcr->catalog = job->client->catalog;
jcr->fileset = job->fileset;
- jcr->messages = job->messages;
+ jcr->messages = job->messages;
jcr->spool_data = job->spool_data;
+ jcr->write_part_after_job = job->write_part_after_job;
if (jcr->RestoreBootstrap) {
free(jcr->RestoreBootstrap);
jcr->RestoreBootstrap = NULL;
if (job->RestoreBootstrap) {
jcr->RestoreBootstrap = bstrdup(job->RestoreBootstrap);
}
+ /* This can be overridden by Console program */
+ jcr->verify_job = job->verify_job;
/* If no default level given, set one */
if (jcr->JobLevel == 0) {
switch (jcr->JobType) {
}
}
}
+
+/*
+ * copy the storage definitions from an old JCR to a new one
+ */
+void copy_storage(JCR *new_jcr, JCR *old_jcr)
+{
+ if (old_jcr->storage) {
+ STORE *st;
+ if (old_jcr->storage) {
+ delete old_jcr->storage;
+ }
+ new_jcr->storage = New(alist(10, not_owned_by_alist));
+ foreach_alist(st, old_jcr->storage) {
+ new_jcr->storage->append(st);
+ }
+ }
+ if (old_jcr->store) {
+ new_jcr->store = old_jcr->store;
+ } else if (new_jcr->storage) {
+ new_jcr->store = (STORE *)new_jcr->storage->first();
+ }
+}
+
+/* Set storage override */
+void set_storage(JCR *jcr, STORE *store)
+{
+ STORE *storage;
+
+ jcr->store = store;
+ foreach_alist(storage, jcr->storage) {
+ if (store == storage) {
+ return;
+ }
+ }
+ /* Store not in list, so add it */
+ jcr->storage->prepend(store);
+}