2 * Job control and execution for Storage Daemon
10 Copyright (C) 2000-2006 Kern Sibbald
12 This program is free software; you can redistribute it and/or
13 modify it under the terms of the GNU General Public License
14 version 2 as amended with additional clauses defined in the
15 file LICENSE in the main source directory.
17 This program is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 the file LICENSE for additional details.
27 static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
29 /* Imported variables */
30 extern uint32_t VolSessionTime;
32 /* Imported functions */
33 extern uint32_t newVolSessionId();
35 /* Requests from the Director daemon */
36 static char jobcmd[] = "JobId=%d job=%127s job_name=%127s client_name=%127s "
37 "type=%d level=%d FileSet=%127s NoAttr=%d SpoolAttr=%d FileSetMD5=%127s "
38 "SpoolData=%d WritePartAfterJob=%d PreferMountedVols=%d\n";
41 /* Responses sent to Director daemon */
42 static char OKjob[] = "3000 OK Job SDid=%u SDtime=%u Authorization=%s\n";
43 static char BAD_job[] = "3915 Bad Job command. stat=%d CMD: %s\n";
44 //static char OK_query[] = "3001 OK query\n";
45 //static char NO_query[] = "3918 Query failed\n";
46 //static char BAD_query[] = "3917 Bad query command: %s\n";
49 * Director requests us to start a job
50 * Basic tasks done here:
51 * - We pickup the JobId to be run from the Director.
52 * - We pickup the device, media, and pool from the Director
53 * - Wait for a connection from the File Daemon (FD)
54 * - Accept commands from the FD (i.e. run the job)
55 * - Return when the connection is terminated or
58 bool job_cmd(JCR *jcr)
62 BSOCK *dir = jcr->dir_bsock;
63 POOL_MEM job_name, client_name, job, fileset_name, fileset_md5;
64 int JobType, level, spool_attributes, no_attributes, spool_data;
65 int write_part_after_job, PreferMountedVols;
70 * Get JobId and permissions from Director
72 Dmsg1(100, "<dird: %s", dir->msg);
73 stat = sscanf(dir->msg, jobcmd, &JobId, job.c_str(), job_name.c_str(),
75 &JobType, &level, fileset_name.c_str(), &no_attributes,
76 &spool_attributes, fileset_md5.c_str(), &spool_data,
77 &write_part_after_job, &PreferMountedVols);
79 pm_strcpy(jcr->errmsg, dir->msg);
80 bnet_fsend(dir, BAD_job, stat, jcr->errmsg);
81 Dmsg1(100, ">dird: %s", dir->msg);
82 set_jcr_job_status(jcr, JS_ErrorTerminated);
86 * Since this job could be rescheduled, we
87 * check to see if we have it already. If so
88 * free the old jcr and use the new one.
90 ojcr = get_jcr_by_full_name(job.c_str());
91 if (ojcr && !ojcr->authenticated) {
92 Dmsg2(100, "Found ojcr=0x%x Job %s\n", (unsigned)(long)ojcr, job.c_str());
96 jcr->VolSessionId = newVolSessionId();
97 jcr->VolSessionTime = VolSessionTime;
98 bstrncpy(jcr->Job, job, sizeof(jcr->Job));
99 unbash_spaces(job_name);
100 jcr->job_name = get_pool_memory(PM_NAME);
101 pm_strcpy(jcr->job_name, job_name);
102 unbash_spaces(client_name);
103 jcr->client_name = get_pool_memory(PM_NAME);
104 pm_strcpy(jcr->client_name, client_name);
105 unbash_spaces(fileset_name);
106 jcr->fileset_name = get_pool_memory(PM_NAME);
107 pm_strcpy(jcr->fileset_name, fileset_name);
108 jcr->JobType = JobType;
109 jcr->JobLevel = level;
110 jcr->no_attributes = no_attributes;
111 jcr->spool_attributes = spool_attributes;
112 jcr->spool_data = spool_data;
113 jcr->write_part_after_job = write_part_after_job;
114 jcr->fileset_md5 = get_pool_memory(PM_NAME);
115 pm_strcpy(jcr->fileset_md5, fileset_md5);
116 jcr->PreferMountedVols = PreferMountedVols;
118 jcr->authenticated = false;
121 * Pass back an authorization key for the File daemon
123 make_session_key(auth_key, NULL, 1);
124 bnet_fsend(dir, OKjob, jcr->VolSessionId, jcr->VolSessionTime, auth_key);
125 Dmsg1(100, ">dird: %s", dir->msg);
126 jcr->sd_auth_key = bstrdup(auth_key);
127 memset(auth_key, 0, sizeof(auth_key));
128 generate_daemon_event(jcr, "JobStart");
132 bool run_cmd(JCR *jcr)
136 struct timespec timeout;
139 Dmsg1(200, "Run_cmd: %s\n", jcr->dir_bsock->msg);
140 /* The following jobs don't need the FD */
141 switch (jcr->JobType) {
145 jcr->authenticated = true;
150 set_jcr_job_status(jcr, JS_WaitFD); /* wait for FD to connect */
151 dir_send_job_status(jcr);
153 gettimeofday(&tv, &tz);
154 timeout.tv_nsec = tv.tv_usec * 1000;
155 timeout.tv_sec = tv.tv_sec + 30 * 60; /* wait 30 minutes */
157 Dmsg1(100, "%s waiting on FD to contact SD\n", jcr->Job);
159 * Wait for the File daemon to contact us to start the Job,
160 * when he does, we will be released, unless the 30 minutes
164 for ( ;!job_canceled(jcr); ) {
165 errstat = pthread_cond_timedwait(&jcr->job_start_wait, &mutex, &timeout);
166 if (errstat == 0 || errstat == ETIMEDOUT) {
172 memset(jcr->sd_auth_key, 0, strlen(jcr->sd_auth_key));
174 if (jcr->authenticated && !job_canceled(jcr)) {
175 Dmsg1(100, "Running job %s\n", jcr->Job);
176 run_job(jcr); /* Run the job */
182 * After receiving a connection (in job.c) if it is
183 * from the File daemon, this routine is called.
185 void handle_filed_connection(BSOCK *fd, char *job_name)
189 bmicrosleep(0, 50000); /* wait 50 millisecs */
190 if (!(jcr=get_jcr_by_full_name(job_name))) {
191 Jmsg1(NULL, M_FATAL, 0, _("Job name not found: %s\n"), job_name);
192 Dmsg1(100, "Job name not found: %s\n", job_name);
196 jcr->file_bsock = fd;
197 jcr->file_bsock->jcr = jcr;
199 Dmsg1(110, "Found Job %s\n", job_name);
201 if (jcr->authenticated) {
202 Jmsg2(jcr, M_FATAL, 0, _("Hey!!!! JobId %u Job %s already authenticated.\n"),
203 jcr->JobId, jcr->Job);
209 * Authenticate the File daemon
211 if (jcr->authenticated || !authenticate_filed(jcr)) {
212 Dmsg1(100, "Authentication failed Job %s\n", jcr->Job);
213 Jmsg(jcr, M_FATAL, 0, _("Unable to authenticate File daemon\n"));
215 jcr->authenticated = true;
216 Dmsg1(110, "OK Authentication Job %s\n", jcr->Job);
219 if (!jcr->authenticated) {
220 set_jcr_job_status(jcr, JS_ErrorTerminated);
222 pthread_cond_signal(&jcr->job_start_wait); /* wake waiting job */
230 * Query Device command from Director
231 * Sends Storage Daemon's information on the device to the
232 * caller (presumably the Director).
233 * This command always returns "true" so that the line is
234 * not closed on an error.
237 bool query_cmd(JCR *jcr)
239 POOL_MEM dev_name, VolumeName, MediaType, ChangerName;
240 BSOCK *dir = jcr->dir_bsock;
242 AUTOCHANGER *changer;
245 Dmsg1(100, "Query_cmd: %s", dir->msg);
246 ok = sscanf(dir->msg, query_device, dev_name.c_str()) == 1;
247 Dmsg1(100, "<dird: %s\n", dir->msg);
249 unbash_spaces(dev_name);
250 foreach_res(device, R_DEVICE) {
251 /* Find resource, and make sure we were able to open it */
252 if (fnmatch(dev_name.c_str(), device->hdr.name, 0) == 0) {
254 device->dev = init_dev(jcr, device);
259 ok = dir_update_device(jcr, device->dev);
261 ok = bnet_fsend(dir, OK_query);
263 bnet_fsend(dir, NO_query);
268 foreach_res(changer, R_AUTOCHANGER) {
269 /* Find resource, and make sure we were able to open it */
270 if (fnmatch(dev_name.c_str(), changer->hdr.name, 0) == 0) {
271 if (!changer->device || changer->device->size() == 0) {
272 continue; /* no devices */
274 ok = dir_update_changer(jcr, changer);
276 ok = bnet_fsend(dir, OK_query);
278 bnet_fsend(dir, NO_query);
283 /* If we get here, the device/autochanger was not found */
284 unbash_spaces(dir->msg);
285 pm_strcpy(jcr->errmsg, dir->msg);
286 bnet_fsend(dir, NO_device, dev_name.c_str());
287 Dmsg1(100, ">dird: %s\n", dir->msg);
289 unbash_spaces(dir->msg);
290 pm_strcpy(jcr->errmsg, dir->msg);
291 bnet_fsend(dir, BAD_query, jcr->errmsg);
292 Dmsg1(100, ">dird: %s\n", dir->msg);
302 * Destroy the Job Control Record and associated
303 * resources (sockets).
305 void stored_free_jcr(JCR *jcr)
307 if (jcr->file_bsock) {
308 bnet_close(jcr->file_bsock);
309 jcr->file_bsock = NULL;
312 free_pool_memory(jcr->job_name);
314 if (jcr->client_name) {
315 free_memory(jcr->client_name);
316 jcr->client_name = NULL;
318 if (jcr->fileset_name) {
319 free_memory(jcr->fileset_name);
321 if (jcr->fileset_md5) {
322 free_memory(jcr->fileset_md5);
328 if (jcr->RestoreBootstrap) {
329 unlink(jcr->RestoreBootstrap);
330 free_pool_memory(jcr->RestoreBootstrap);
331 jcr->RestoreBootstrap = NULL;
333 if (jcr->next_dev || jcr->prev_dev) {
334 Emsg0(M_FATAL, 0, _("In free_jcr(), but still attached to device!!!!\n"));
336 pthread_cond_destroy(&jcr->job_start_wait);
346 free_dcr(jcr->read_dcr);
347 jcr->read_dcr = NULL;
351 foreach_alist(store, jcr->dirstore) {
352 delete store->device;
355 delete jcr->dirstore;
356 jcr->dirstore = NULL;