+/*
+ * Cancel a job -- typically called by the UA (Console program), but may also
+ * be called by the job watchdog.
+ *
+ * Returns: 1 if cancel appears to be successful
+ * 0 on failure. Message sent to ua->jcr.
+ */
+int cancel_job(UAContext *ua, JCR *jcr)
+{
+ BSOCK *sd, *fd;
+
+ switch (jcr->JobStatus) {
+ case JS_Created:
+ case JS_WaitJobRes:
+ case JS_WaitClientRes:
+ case JS_WaitStoreRes:
+ case JS_WaitPriority:
+ case JS_WaitMaxJobs:
+ case JS_WaitStartTime:
+ set_jcr_job_status(jcr, JS_Canceled);
+ bsendmsg(ua, _("JobId %d, Job %s marked to be canceled.\n"),
+ jcr->JobId, jcr->Job);
+ jobq_remove(&job_queue, jcr); /* attempt to remove it from queue */
+ return 1;
+
+ default:
+ set_jcr_job_status(jcr, JS_Canceled);
+
+ /* Cancel File daemon */
+ if (jcr->file_bsock) {
+ ua->jcr->client = jcr->client;
+ if (!connect_to_file_daemon(ua->jcr, 10, FDConnectTimeout, 1)) {
+ bsendmsg(ua, _("Failed to connect to File daemon.\n"));
+ return 0;
+ }
+ Dmsg0(200, "Connected to file daemon\n");
+ fd = ua->jcr->file_bsock;
+ bnet_fsend(fd, "cancel Job=%s\n", jcr->Job);
+ while (bnet_recv(fd) >= 0) {
+ bsendmsg(ua, "%s", fd->msg);
+ }
+ bnet_sig(fd, BNET_TERMINATE);
+ bnet_close(fd);
+ ua->jcr->file_bsock = NULL;
+ }
+
+ /* Cancel Storage daemon */
+ if (jcr->store_bsock) {
+ ua->jcr->store = jcr->store;
+ if (!connect_to_storage_daemon(ua->jcr, 10, SDConnectTimeout, 1)) {
+ bsendmsg(ua, _("Failed to connect to Storage daemon.\n"));
+ return 0;
+ }
+ Dmsg0(200, "Connected to storage daemon\n");
+ sd = ua->jcr->store_bsock;
+ bnet_fsend(sd, "cancel Job=%s\n", jcr->Job);
+ while (bnet_recv(sd) >= 0) {
+ bsendmsg(ua, "%s", sd->msg);
+ }
+ bnet_sig(sd, BNET_TERMINATE);
+ bnet_close(sd);
+ ua->jcr->store_bsock = NULL;
+ }
+ }
+
+ return 1;
+}
+
+
+static void job_monitor_destructor(watchdog_t *self)
+{
+ JCR *control_jcr = (JCR *) self->data;
+
+ free_jcr(control_jcr);
+}
+
+static void job_monitor_watchdog(watchdog_t *self)
+{
+ JCR *control_jcr, *jcr;
+
+ control_jcr = (JCR *)self->data;
+
+ Dmsg1(400, "job_monitor_watchdog %p called\n", self);
+
+ lock_jcr_chain();
+
+ foreach_jcr(jcr) {
+ bool cancel;
+
+ if (jcr->JobId == 0) {
+ Dmsg2(400, "Skipping JCR %p (%s) with JobId 0\n",
+ jcr, jcr->Job);
+ /* Keep reference counts correct */
+ free_locked_jcr(jcr);
+ continue;
+ }
+
+ /* check MaxWaitTime */
+ cancel = job_check_maxwaittime(control_jcr, jcr);
+
+ /* check MaxRunTime */
+ cancel |= job_check_maxruntime(control_jcr, jcr);
+
+ if (cancel) {
+ Dmsg3(200, "Cancelling JCR %p jobid %d (%s)\n",
+ jcr, jcr->JobId, jcr->Job);
+
+ UAContext *ua = new_ua_context(jcr);
+ ua->jcr = control_jcr;
+ cancel_job(ua, jcr);
+ free_ua_context(ua);
+
+ Dmsg1(200, "Have cancelled JCR %p\n", jcr);
+ }
+
+ /* Keep reference counts correct */
+ free_locked_jcr(jcr);
+ }
+ unlock_jcr_chain();
+}
+
+/*
+ * Check if the maxwaittime has expired and it is possible
+ * to cancel the job.
+ */
+static bool job_check_maxwaittime(JCR *control_jcr, JCR *jcr)
+{
+ bool cancel = false;
+
+ if (jcr->job->MaxWaitTime == 0) {
+ return false;
+ }
+ if ((watchdog_time - jcr->start_time) < jcr->job->MaxWaitTime) {
+ Dmsg3(200, "Job %p (%s) with MaxWaitTime %d not expired\n",
+ jcr, jcr->Job, jcr->job->MaxWaitTime);
+ return false;
+ }
+ Dmsg3(200, "Job %d (%s): MaxWaitTime of %d seconds exceeded, "
+ "checking status\n",
+ jcr->JobId, jcr->Job, jcr->job->MaxWaitTime);
+ switch (jcr->JobStatus) {
+ case JS_Created:
+ case JS_Blocked:
+ case JS_WaitFD:
+ case JS_WaitSD:
+ case JS_WaitStoreRes:
+ case JS_WaitClientRes:
+ case JS_WaitJobRes:
+ case JS_WaitPriority:
+ case JS_WaitMaxJobs:
+ case JS_WaitStartTime:
+ cancel = true;
+ Dmsg0(200, "JCR blocked in #1\n");
+ break;
+ case JS_Running:
+ Dmsg0(200, "JCR running, checking SD status\n");
+ switch (jcr->SDJobStatus) {
+ case JS_WaitMount:
+ case JS_WaitMedia:
+ case JS_WaitFD:
+ cancel = true;
+ Dmsg0(200, "JCR blocked in #2\n");
+ break;
+ default:
+ Dmsg0(200, "JCR not blocked in #2\n");
+ break;
+ }
+ break;
+ case JS_Terminated:
+ case JS_ErrorTerminated:
+ case JS_Canceled:
+ case JS_FatalError:
+ Dmsg0(200, "JCR already dead in #3\n");
+ break;
+ default:
+ Jmsg1(jcr, M_ERROR, 0, _("Unhandled job status code %d\n"),
+ jcr->JobStatus);
+ }
+ Dmsg3(200, "MaxWaitTime result: %scancel JCR %p (%s)\n",
+ cancel ? "" : "do not ", jcr, jcr->job);
+
+ return cancel;
+}
+
+/*
+ * Check if maxruntime has expired and if the job can be
+ * canceled.
+ */
+static bool job_check_maxruntime(JCR *control_jcr, JCR *jcr)
+{
+ bool cancel = false;
+
+ if (jcr->job->MaxRunTime == 0) {
+ return false;
+ }
+ if ((watchdog_time - jcr->start_time) < jcr->job->MaxRunTime) {
+ Dmsg3(200, "Job %p (%s) with MaxRunTime %d not expired\n",
+ jcr, jcr->Job, jcr->job->MaxRunTime);
+ return false;
+ }
+
+ switch (jcr->JobStatus) {
+ case JS_Created:
+ case JS_Running:
+ case JS_Blocked:
+ case JS_WaitFD:
+ case JS_WaitSD:
+ case JS_WaitStoreRes:
+ case JS_WaitClientRes:
+ case JS_WaitJobRes:
+ case JS_WaitPriority:
+ case JS_WaitMaxJobs:
+ case JS_WaitStartTime:
+ case JS_Differences:
+ cancel = true;
+ break;
+ case JS_Terminated:
+ case JS_ErrorTerminated:
+ case JS_Canceled:
+ case JS_FatalError:
+ cancel = false;
+ break;
+ default:
+ Jmsg1(jcr, M_ERROR, 0, _("Unhandled job status code %d\n"),
+ jcr->JobStatus);
+ }
+
+ Dmsg3(200, "MaxRunTime result: %scancel JCR %p (%s)\n",
+ cancel ? "" : "do not ", jcr, jcr->job);
+
+ return cancel;
+}
+
+