2 * Job control and execution for Storage Daemon
10 Copyright (C) 2000-2006 Kern Sibbald
12 This program is free software; you can redistribute it and/or
13 modify it under the terms of the GNU General Public License
14 version 2 as amended with additional clauses defined in the
15 file LICENSE in the main source directory.
17 This program is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 the file LICENSE for additional details.
27 /* Imported variables */
28 extern uint32_t VolSessionTime;
30 /* Imported functions */
31 extern uint32_t newVolSessionId();
32 extern bool do_mac(JCR *jcr);
34 /* Requests from the Director daemon */
35 static char jobcmd[] = "JobId=%d job=%127s job_name=%127s client_name=%127s "
36 "type=%d level=%d FileSet=%127s NoAttr=%d SpoolAttr=%d FileSetMD5=%127s "
37 "SpoolData=%d WritePartAfterJob=%d PreferMountedVols=%d\n";
40 /* Responses sent to Director daemon */
41 static char OKjob[] = "3000 OK Job SDid=%u SDtime=%u Authorization=%s\n";
42 static char BAD_job[] = "3915 Bad Job command: %s\n";
43 //static char OK_query[] = "3001 OK query\n";
44 //static char NO_query[] = "3918 Query failed\n";
45 //static char BAD_query[] = "3917 Bad query command: %s\n";
48 * Director requests us to start a job
49 * Basic tasks done here:
50 * - We pickup the JobId to be run from the Director.
51 * - We pickup the device, media, and pool from the Director
52 * - Wait for a connection from the File Daemon (FD)
53 * - Accept commands from the FD (i.e. run the job)
54 * - Return when the connection is terminated or
57 bool job_cmd(JCR *jcr)
61 BSOCK *dir = jcr->dir_bsock;
62 POOL_MEM job_name, client_name, job, fileset_name, fileset_md5;
63 int JobType, level, spool_attributes, no_attributes, spool_data;
64 int write_part_after_job, PreferMountedVols;
69 * Get JobId and permissions from Director
71 Dmsg1(100, "<dird: %s", dir->msg);
72 if (sscanf(dir->msg, jobcmd, &JobId, job.c_str(), job_name.c_str(),
74 &JobType, &level, fileset_name.c_str(), &no_attributes,
75 &spool_attributes, fileset_md5.c_str(), &spool_data,
76 &write_part_after_job, &PreferMountedVols) != 13) {
77 pm_strcpy(jcr->errmsg, dir->msg);
78 bnet_fsend(dir, BAD_job, jcr->errmsg);
79 Dmsg1(100, ">dird: %s", dir->msg);
80 Emsg1(M_FATAL, 0, _("Bad Job Command from Director: %s\n"), jcr->errmsg);
81 set_jcr_job_status(jcr, JS_ErrorTerminated);
85 * Since this job could be rescheduled, we
86 * check to see if we have it already. If so
87 * free the old jcr and use the new one.
89 ojcr = get_jcr_by_full_name(job.c_str());
90 if (ojcr && !ojcr->authenticated) {
91 Dmsg2(100, "Found ojcr=0x%x Job %s\n", (unsigned)(long)ojcr, job.c_str());
95 jcr->VolSessionId = newVolSessionId();
96 jcr->VolSessionTime = VolSessionTime;
97 bstrncpy(jcr->Job, job, sizeof(jcr->Job));
98 unbash_spaces(job_name);
99 jcr->job_name = get_pool_memory(PM_NAME);
100 pm_strcpy(jcr->job_name, job_name);
101 unbash_spaces(client_name);
102 jcr->client_name = get_pool_memory(PM_NAME);
103 pm_strcpy(jcr->client_name, client_name);
104 unbash_spaces(fileset_name);
105 jcr->fileset_name = get_pool_memory(PM_NAME);
106 pm_strcpy(jcr->fileset_name, fileset_name);
107 jcr->JobType = JobType;
108 jcr->JobLevel = level;
109 jcr->no_attributes = no_attributes;
110 jcr->spool_attributes = spool_attributes;
111 jcr->spool_data = spool_data;
112 jcr->write_part_after_job = write_part_after_job;
113 jcr->fileset_md5 = get_pool_memory(PM_NAME);
114 pm_strcpy(jcr->fileset_md5, fileset_md5);
115 jcr->PreferMountedVols = PreferMountedVols;
117 jcr->authenticated = false;
120 * Pass back an authorization key for the File daemon
122 make_session_key(auth_key, NULL, 1);
123 bnet_fsend(dir, OKjob, jcr->VolSessionId, jcr->VolSessionTime, auth_key);
124 Dmsg1(100, ">dird: %s", dir->msg);
125 jcr->sd_auth_key = bstrdup(auth_key);
126 memset(auth_key, 0, sizeof(auth_key));
127 generate_daemon_event(jcr, "JobStart");
131 bool run_cmd(JCR *jcr)
135 struct timespec timeout;
138 Dmsg1(200, "Run_cmd: %s\n", jcr->dir_bsock->msg);
139 /* The following jobs don't need the FD */
140 switch (jcr->JobType) {
144 jcr->authenticated = true;
149 set_jcr_job_status(jcr, JS_WaitFD); /* wait for FD to connect */
150 dir_send_job_status(jcr);
152 gettimeofday(&tv, &tz);
153 timeout.tv_nsec = tv.tv_usec * 1000;
154 timeout.tv_sec = tv.tv_sec + 30 * 60; /* wait 30 minutes */
156 Dmsg1(100, "%s waiting on FD to contact SD\n", jcr->Job);
158 * Wait for the File daemon to contact us to start the Job,
159 * when he does, we will be released, unless the 30 minutes
163 for ( ;!job_canceled(jcr); ) {
164 errstat = pthread_cond_timedwait(&jcr->job_start_wait, &jcr->mutex, &timeout);
165 if (errstat == 0 || errstat == ETIMEDOUT) {
171 memset(jcr->sd_auth_key, 0, strlen(jcr->sd_auth_key));
173 if (jcr->authenticated && !job_canceled(jcr)) {
174 Dmsg1(100, "Running job %s\n", jcr->Job);
175 run_job(jcr); /* Run the job */
181 * After receiving a connection (in job.c) if it is
182 * from the File daemon, this routine is called.
184 void handle_filed_connection(BSOCK *fd, char *job_name)
188 bmicrosleep(0, 50000); /* wait 50 millisecs */
189 if (!(jcr=get_jcr_by_full_name(job_name))) {
190 Jmsg1(NULL, M_FATAL, 0, _("Job name not found: %s\n"), job_name);
191 Dmsg1(100, "Job name not found: %s\n", job_name);
195 jcr->file_bsock = fd;
196 jcr->file_bsock->jcr = jcr;
198 Dmsg1(110, "Found Job %s\n", job_name);
200 if (jcr->authenticated) {
201 Jmsg2(jcr, M_FATAL, 0, _("Hey!!!! JobId %u Job %s already authenticated.\n"),
202 jcr->JobId, jcr->Job);
208 * Authenticate the File daemon
210 if (jcr->authenticated || !authenticate_filed(jcr)) {
211 Dmsg1(100, "Authentication failed Job %s\n", jcr->Job);
212 Jmsg(jcr, M_FATAL, 0, _("Unable to authenticate File daemon\n"));
214 jcr->authenticated = true;
215 Dmsg1(110, "OK Authentication Job %s\n", jcr->Job);
219 if (!jcr->authenticated) {
220 set_jcr_job_status(jcr, JS_ErrorTerminated);
222 pthread_cond_signal(&jcr->job_start_wait); /* wake waiting job */
231 * Query Device command from Director
232 * Sends Storage Daemon's information on the device to the
233 * caller (presumably the Director).
234 * This command always returns "true" so that the line is
235 * not closed on an error.
238 bool query_cmd(JCR *jcr)
240 POOL_MEM dev_name, VolumeName, MediaType, ChangerName;
241 BSOCK *dir = jcr->dir_bsock;
243 AUTOCHANGER *changer;
246 Dmsg1(100, "Query_cmd: %s", dir->msg);
247 ok = sscanf(dir->msg, query_device, dev_name.c_str()) == 1;
248 Dmsg1(100, "<dird: %s\n", dir->msg);
250 unbash_spaces(dev_name);
251 foreach_res(device, R_DEVICE) {
252 /* Find resource, and make sure we were able to open it */
253 if (fnmatch(dev_name.c_str(), device->hdr.name, 0) == 0) {
255 device->dev = init_dev(jcr, device);
260 ok = dir_update_device(jcr, device->dev);
262 ok = bnet_fsend(dir, OK_query);
264 bnet_fsend(dir, NO_query);
269 foreach_res(changer, R_AUTOCHANGER) {
270 /* Find resource, and make sure we were able to open it */
271 if (fnmatch(dev_name.c_str(), changer->hdr.name, 0) == 0) {
272 if (!changer->device || changer->device->size() == 0) {
273 continue; /* no devices */
275 ok = dir_update_changer(jcr, changer);
277 ok = bnet_fsend(dir, OK_query);
279 bnet_fsend(dir, NO_query);
284 /* If we get here, the device/autochanger was not found */
285 unbash_spaces(dir->msg);
286 pm_strcpy(jcr->errmsg, dir->msg);
287 bnet_fsend(dir, NO_device, dev_name.c_str());
288 Dmsg1(100, ">dird: %s\n", dir->msg);
290 unbash_spaces(dir->msg);
291 pm_strcpy(jcr->errmsg, dir->msg);
292 bnet_fsend(dir, BAD_query, jcr->errmsg);
293 Dmsg1(100, ">dird: %s\n", dir->msg);
303 * Destroy the Job Control Record and associated
304 * resources (sockets).
306 void stored_free_jcr(JCR *jcr)
308 if (jcr->file_bsock) {
309 bnet_close(jcr->file_bsock);
310 jcr->file_bsock = NULL;
313 free_pool_memory(jcr->job_name);
315 if (jcr->client_name) {
316 free_memory(jcr->client_name);
317 jcr->client_name = NULL;
319 if (jcr->fileset_name) {
320 free_memory(jcr->fileset_name);
322 if (jcr->fileset_md5) {
323 free_memory(jcr->fileset_md5);
329 if (jcr->RestoreBootstrap) {
330 unlink(jcr->RestoreBootstrap);
331 free_pool_memory(jcr->RestoreBootstrap);
332 jcr->RestoreBootstrap = NULL;
334 if (jcr->next_dev || jcr->prev_dev) {
335 Emsg0(M_FATAL, 0, _("In free_jcr(), but still attached to device!!!!\n"));
337 pthread_cond_destroy(&jcr->job_start_wait);
347 free_dcr(jcr->read_dcr);
348 jcr->read_dcr = NULL;
352 foreach_alist(store, jcr->dirstore) {
353 delete store->device;
356 delete jcr->dirstore;
357 jcr->dirstore = NULL;