2 * Job control and execution for Storage Daemon
10 Copyright (C) 2000-2005 Kern Sibbald
12 This program is free software; you can redistribute it and/or
13 modify it under the terms of the GNU General Public License
14 version 2 as amended with additional clauses defined in the
15 file LICENSE in the main source directory.
17 This program is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 the file LICENSE for additional details.
27 /* Imported variables */
28 extern uint32_t VolSessionTime;
30 /* Imported functions */
31 extern uint32_t newVolSessionId();
33 /* Requests from the Director daemon */
34 static char jobcmd[] = "JobId=%d job=%127s job_name=%127s client_name=%127s "
35 "type=%d level=%d FileSet=%127s NoAttr=%d SpoolAttr=%d FileSetMD5=%127s "
36 "SpoolData=%d WritePartAfterJob=%d PreferMountedVols=%d\n";
39 /* Responses sent to Director daemon */
40 static char OKjob[] = "3000 OK Job SDid=%u SDtime=%u Authorization=%s\n";
41 static char BAD_job[] = "3915 Bad Job command: %s\n";
42 //static char OK_query[] = "3001 OK query\n";
43 //static char NO_query[] = "3918 Query failed\n";
44 //static char BAD_query[] = "3917 Bad query command: %s\n";
47 * Director requests us to start a job
48 * Basic tasks done here:
49 * - We pickup the JobId to be run from the Director.
50 * - We pickup the device, media, and pool from the Director
51 * - Wait for a connection from the File Daemon (FD)
52 * - Accept commands from the FD (i.e. run the job)
53 * - Return when the connection is terminated or
56 bool job_cmd(JCR *jcr)
60 BSOCK *dir = jcr->dir_bsock;
61 POOL_MEM job_name, client_name, job, fileset_name, fileset_md5;
62 int JobType, level, spool_attributes, no_attributes, spool_data;
63 int write_part_after_job, PreferMountedVols;
68 * Get JobId and permissions from Director
70 Dmsg1(100, "<dird: %s", dir->msg);
71 if (sscanf(dir->msg, jobcmd, &JobId, job.c_str(), job_name.c_str(),
73 &JobType, &level, fileset_name.c_str(), &no_attributes,
74 &spool_attributes, fileset_md5.c_str(), &spool_data,
75 &write_part_after_job, &PreferMountedVols) != 13) {
76 pm_strcpy(jcr->errmsg, dir->msg);
77 bnet_fsend(dir, BAD_job, jcr->errmsg);
78 Dmsg1(100, ">dird: %s", dir->msg);
79 Emsg1(M_FATAL, 0, _("Bad Job Command from Director: %s\n"), jcr->errmsg);
80 set_jcr_job_status(jcr, JS_ErrorTerminated);
84 * Since this job could be rescheduled, we
85 * check to see if we have it already. If so
86 * free the old jcr and use the new one.
88 ojcr = get_jcr_by_full_name(job.c_str());
89 if (ojcr && !ojcr->authenticated) {
90 Dmsg2(100, "Found ojcr=0x%x Job %s\n", (unsigned)(long)ojcr, job.c_str());
94 jcr->VolSessionId = newVolSessionId();
95 jcr->VolSessionTime = VolSessionTime;
96 bstrncpy(jcr->Job, job, sizeof(jcr->Job));
97 unbash_spaces(job_name);
98 jcr->job_name = get_pool_memory(PM_NAME);
99 pm_strcpy(jcr->job_name, job_name);
100 unbash_spaces(client_name);
101 jcr->client_name = get_pool_memory(PM_NAME);
102 pm_strcpy(jcr->client_name, client_name);
103 unbash_spaces(fileset_name);
104 jcr->fileset_name = get_pool_memory(PM_NAME);
105 pm_strcpy(jcr->fileset_name, fileset_name);
106 jcr->JobType = JobType;
107 jcr->JobLevel = level;
108 jcr->no_attributes = no_attributes;
109 jcr->spool_attributes = spool_attributes;
110 jcr->spool_data = spool_data;
111 jcr->write_part_after_job = write_part_after_job;
112 jcr->fileset_md5 = get_pool_memory(PM_NAME);
113 pm_strcpy(jcr->fileset_md5, fileset_md5);
114 jcr->PreferMountedVols = PreferMountedVols;
116 jcr->authenticated = false;
119 * Pass back an authorization key for the File daemon
121 make_session_key(auth_key, NULL, 1);
122 bnet_fsend(dir, OKjob, jcr->VolSessionId, jcr->VolSessionTime, auth_key);
123 Dmsg1(100, ">dird: %s", dir->msg);
124 jcr->sd_auth_key = bstrdup(auth_key);
125 memset(auth_key, 0, sizeof(auth_key));
126 generate_daemon_event(jcr, "JobStart");
130 bool run_cmd(JCR *jcr)
134 struct timespec timeout;
137 Dmsg1(200, "Run_cmd: %s\n", jcr->dir_bsock->msg);
138 /* The following jobs don't need the FD */
139 switch (jcr->JobType) {
143 jcr->authenticated = true;
148 set_jcr_job_status(jcr, JS_WaitFD); /* wait for FD to connect */
149 dir_send_job_status(jcr);
151 gettimeofday(&tv, &tz);
152 timeout.tv_nsec = tv.tv_usec * 1000;
153 timeout.tv_sec = tv.tv_sec + 30 * 60; /* wait 30 minutes */
155 Dmsg1(100, "%s waiting on FD to contact SD\n", jcr->Job);
157 * Wait for the File daemon to contact us to start the Job,
158 * when he does, we will be released, unless the 30 minutes
162 for ( ;!job_canceled(jcr); ) {
163 errstat = pthread_cond_timedwait(&jcr->job_start_wait, &jcr->mutex, &timeout);
164 if (errstat == 0 || errstat == ETIMEDOUT) {
170 memset(jcr->sd_auth_key, 0, strlen(jcr->sd_auth_key));
172 if (jcr->authenticated && !job_canceled(jcr)) {
173 Dmsg1(100, "Running job %s\n", jcr->Job);
174 run_job(jcr); /* Run the job */
180 * After receiving a connection (in job.c) if it is
181 * from the File daemon, this routine is called.
183 void handle_filed_connection(BSOCK *fd, char *job_name)
187 bmicrosleep(0, 50000); /* wait 50 millisecs */
188 if (!(jcr=get_jcr_by_full_name(job_name))) {
189 Jmsg1(NULL, M_FATAL, 0, _("Job name not found: %s\n"), job_name);
190 Dmsg1(100, "Job name not found: %s\n", job_name);
194 jcr->file_bsock = fd;
195 jcr->file_bsock->jcr = jcr;
197 Dmsg1(110, "Found Job %s\n", job_name);
199 if (jcr->authenticated) {
200 Jmsg2(jcr, M_FATAL, 0, _("Hey!!!! JobId %u Job %s already authenticated.\n"),
201 jcr->JobId, jcr->Job);
207 * Authenticate the File daemon
209 if (jcr->authenticated || !authenticate_filed(jcr)) {
210 Dmsg1(100, "Authentication failed Job %s\n", jcr->Job);
211 Jmsg(jcr, M_FATAL, 0, _("Unable to authenticate File daemon\n"));
213 jcr->authenticated = true;
214 Dmsg1(110, "OK Authentication Job %s\n", jcr->Job);
218 if (!jcr->authenticated) {
219 set_jcr_job_status(jcr, JS_ErrorTerminated);
221 pthread_cond_signal(&jcr->job_start_wait); /* wake waiting job */
230 * Query Device command from Director
231 * Sends Storage Daemon's information on the device to the
232 * caller (presumably the Director).
233 * This command always returns "true" so that the line is
234 * not closed on an error.
237 bool query_cmd(JCR *jcr)
239 POOL_MEM dev_name, VolumeName, MediaType, ChangerName;
240 BSOCK *dir = jcr->dir_bsock;
242 AUTOCHANGER *changer;
245 Dmsg1(100, "Query_cmd: %s", dir->msg);
246 ok = sscanf(dir->msg, query_device, dev_name.c_str()) == 1;
247 Dmsg1(100, "<dird: %s\n", dir->msg);
249 unbash_spaces(dev_name);
251 foreach_res(device, R_DEVICE) {
252 /* Find resource, and make sure we were able to open it */
253 if (fnmatch(dev_name.c_str(), device->hdr.name, 0) == 0) {
255 device->dev = init_dev(jcr, device);
261 ok = dir_update_device(jcr, device->dev);
263 ok = bnet_fsend(dir, OK_query);
265 bnet_fsend(dir, NO_query);
270 foreach_res(changer, R_AUTOCHANGER) {
271 /* Find resource, and make sure we were able to open it */
272 if (fnmatch(dev_name.c_str(), changer->hdr.name, 0) == 0) {
274 if (!changer->device || changer->device->size() == 0) {
275 continue; /* no devices */
277 ok = dir_update_changer(jcr, changer);
279 ok = bnet_fsend(dir, OK_query);
281 bnet_fsend(dir, NO_query);
286 /* If we get here, the device/autochanger was not found */
288 unbash_spaces(dir->msg);
289 pm_strcpy(jcr->errmsg, dir->msg);
290 bnet_fsend(dir, NO_device, dev_name.c_str());
291 Dmsg1(100, ">dird: %s\n", dir->msg);
293 unbash_spaces(dir->msg);
294 pm_strcpy(jcr->errmsg, dir->msg);
295 bnet_fsend(dir, BAD_query, jcr->errmsg);
296 Dmsg1(100, ">dird: %s\n", dir->msg);
306 * Destroy the Job Control Record and associated
307 * resources (sockets).
309 void stored_free_jcr(JCR *jcr)
311 if (jcr->file_bsock) {
312 bnet_close(jcr->file_bsock);
313 jcr->file_bsock = NULL;
316 free_pool_memory(jcr->job_name);
318 if (jcr->client_name) {
319 free_memory(jcr->client_name);
320 jcr->client_name = NULL;
322 if (jcr->fileset_name) {
323 free_memory(jcr->fileset_name);
325 if (jcr->fileset_md5) {
326 free_memory(jcr->fileset_md5);
332 if (jcr->RestoreBootstrap) {
333 unlink(jcr->RestoreBootstrap);
334 free_pool_memory(jcr->RestoreBootstrap);
335 jcr->RestoreBootstrap = NULL;
337 if (jcr->next_dev || jcr->prev_dev) {
338 Emsg0(M_FATAL, 0, _("In free_jcr(), but still attached to device!!!!\n"));
340 pthread_cond_destroy(&jcr->job_start_wait);
350 free_dcr(jcr->read_dcr);
351 jcr->read_dcr = NULL;