3 * Bacula Director -- msgchan.c -- handles the message channel
4 * to the Storage daemon and the File daemon.
6 * Kern Sibbald, August MM
8 * This routine runs as a thread and must be thread reentrant.
10 * Basic tasks done here:
11 * Open a message channel with the Storage daemon
12 * to authenticate ourself and to pass the JobId.
13 * Create a thread to interact with the Storage daemon
14 * who returns a job status and requests Catalog services, etc.
19 Copyright (C) 2000-2004 Kern Sibbald and John Walker
21 This program is free software; you can redistribute it and/or
22 modify it under the terms of the GNU General Public License as
23 published by the Free Software Foundation; either version 2 of
24 the License, or (at your option) any later version.
26 This program is distributed in the hope that it will be useful,
27 but WITHOUT ANY WARRANTY; without even the implied warranty of
28 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
29 General Public License for more details.
31 You should have received a copy of the GNU General Public
32 License along with this program; if not, write to the Free
33 Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
41 /* Commands sent to Storage daemon */
42 static char jobcmd[] = "JobId=%d job=%s job_name=%s client_name=%s \
43 type=%d level=%d FileSet=%s NoAttr=%d SpoolAttr=%d FileSetMD5=%s SpoolData=%d";
44 static char use_device[] = "use device=%s media_type=%s pool_name=%s pool_type=%s\n";
46 /* Response from Storage daemon */
47 static char OKjob[] = "3000 OK Job SDid=%d SDtime=%d Authorization=%100s\n";
48 static char OK_device[] = "3000 OK use device\n";
50 /* Storage Daemon requests */
51 static char Job_start[] = "3010 Job %127s start\n";
52 static char Job_end[] =
53 "3099 Job %127s end JobStatus=%d JobFiles=%d JobBytes=%" lld "\n";
54 static char Job_status[] = "3012 Job %127s jobstatus %d\n";
56 /* Forward referenced functions */
57 static void *msg_thread(void *arg);
60 * Establish a message channel connection with the Storage daemon
61 * and perform authentication.
63 int connect_to_storage_daemon(JCR *jcr, int retry_interval,
64 int max_retry_time, int verbose)
69 * Open message channel with the Storage daemon
71 Dmsg2(200, "bnet_connect to Storage daemon %s:%d\n", jcr->store->address,
73 sd = bnet_connect(jcr, retry_interval, max_retry_time,
74 _("Storage daemon"), jcr->store->address,
75 NULL, jcr->store->SDport, verbose);
79 sd->res = (RES *)jcr->store; /* save pointer to other end */
80 jcr->store_bsock = sd;
82 if (!authenticate_storage_daemon(jcr)) {
89 * Start a job with the Storage daemon
91 int start_storage_daemon_job(JCR *jcr)
97 POOLMEM *device_name, *pool_name, *pool_type, *media_type;
100 sd = jcr->store_bsock;
102 * Now send JobId and permissions, and get back the authorization key.
104 bash_spaces(jcr->job->hdr.name);
105 bash_spaces(jcr->client->hdr.name);
106 bash_spaces(jcr->fileset->hdr.name);
107 if (jcr->fileset->MD5[0] == 0) {
108 strcpy(jcr->fileset->MD5, "**Dummy**");
110 bnet_fsend(sd, jobcmd, jcr->JobId, jcr->Job, jcr->job->hdr.name,
111 jcr->client->hdr.name, jcr->JobType, jcr->JobLevel,
112 jcr->fileset->hdr.name, !jcr->pool->catalog_files,
113 jcr->job->SpoolAttributes, jcr->fileset->MD5, jcr->spool_data);
114 Dmsg1(200, "Jobcmd=%s\n", sd->msg);
115 unbash_spaces(jcr->job->hdr.name);
116 unbash_spaces(jcr->client->hdr.name);
117 unbash_spaces(jcr->fileset->hdr.name);
118 if (bget_dirmsg(sd) > 0) {
119 Dmsg1(110, "<stored: %s", sd->msg);
120 if (sscanf(sd->msg, OKjob, &jcr->VolSessionId,
121 &jcr->VolSessionTime, &auth_key) != 3) {
122 Dmsg1(100, "BadJob=%s\n", sd->msg);
123 Jmsg(jcr, M_FATAL, 0, _("Storage daemon rejected Job command: %s\n"), sd->msg);
126 jcr->sd_auth_key = bstrdup(auth_key);
127 Dmsg1(150, "sd_auth_key=%s\n", jcr->sd_auth_key);
130 Jmsg(jcr, M_FATAL, 0, _("<stored: bad response to Job command: %s\n"),
136 * Send use device = xxx media = yyy pool = zzz
138 device_name = get_pool_memory(PM_NAME);
139 pool_name = get_pool_memory(PM_NAME);
140 pool_type = get_pool_memory(PM_NAME);
141 media_type = get_pool_memory(PM_NAME);
142 pm_strcpy(&device_name, storage->dev_name);
143 pm_strcpy(&media_type, storage->media_type);
144 pm_strcpy(&pool_type, jcr->pool->pool_type);
145 pm_strcpy(&pool_name, jcr->pool->hdr.name);
146 bash_spaces(device_name);
147 bash_spaces(media_type);
148 bash_spaces(pool_type);
149 bash_spaces(pool_name);
150 bnet_fsend(sd, use_device, device_name, media_type, pool_name, pool_type);
151 Dmsg1(110, ">stored: %s", sd->msg);
152 status = response(jcr, sd, OK_device, "Use Device", NO_DISPLAY);
154 pm_strcpy(&pool_type, sd->msg); /* save message */
155 Jmsg(jcr, M_FATAL, 0, _("\n"
156 " Storage daemon didn't accept Device \"%s\" because:\n %s"),
157 device_name, pool_type/* sd->msg */);
159 free_memory(device_name);
160 free_memory(media_type);
161 free_memory(pool_name);
162 free_memory(pool_type);
168 * Start a thread to handle Storage daemon messages and
171 int start_storage_daemon_message_thread(JCR *jcr)
177 jcr->use_count++; /* mark in use by msg thread */
178 jcr->sd_msg_thread_done = false;
179 jcr->SD_msg_chan = 0;
181 if ((status=pthread_create(&thid, NULL, msg_thread, (void *)jcr)) != 0) {
182 Jmsg1(jcr, M_ABORT, 0, _("Cannot create message thread: %s\n"), strerror(status));
184 /* Wait for thread to start */
185 while (jcr->SD_msg_chan == 0) {
191 static void msg_thread_cleanup(void *arg)
193 JCR *jcr = (JCR *)arg;
194 Dmsg0(200, "End msg_thread\n");
195 db_end_transaction(jcr, jcr->db); /* terminate any open transaction */
197 jcr->sd_msg_thread_done = true;
198 pthread_cond_broadcast(&jcr->term_wait); /* wakeup any waiting threads */
199 jcr->SD_msg_chan = 0;
201 free_jcr(jcr); /* release jcr */
205 * Handle the message channel (i.e. requests from the
207 * Note, we are running in a separate thread.
209 static void *msg_thread(void *arg)
211 JCR *jcr = (JCR *)arg;
214 char Job[MAX_NAME_LENGTH];
219 pthread_detach(pthread_self());
220 jcr->SD_msg_chan = pthread_self();
221 pthread_cleanup_push(msg_thread_cleanup, arg);
222 Dmsg0(200, "msg_thread\n");
223 sd = jcr->store_bsock;
225 /* Read the Storage daemon's output.
227 Dmsg0(200, "Start msg_thread loop\n");
228 while ((stat=bget_dirmsg(sd)) >= 0) {
229 Dmsg1(200, "<stored: %s", sd->msg);
230 if (sscanf(sd->msg, Job_start, &Job) == 1) {
233 if (sscanf(sd->msg, Job_end, &Job, &JobStatus, &JobFiles,
235 jcr->SDJobStatus = JobStatus; /* termination status */
236 jcr->SDJobFiles = JobFiles;
237 jcr->SDJobBytes = JobBytes;
240 if (sscanf(sd->msg, Job_status, &Job, &JobStatus) == 2) {
241 jcr->SDJobStatus = JobStatus; /* current status */
245 if (is_bnet_error(sd)) {
246 jcr->SDJobStatus = JS_ErrorTerminated;
248 pthread_cleanup_pop(1);
252 void wait_for_storage_daemon_termination(JCR *jcr)
254 int cancel_count = 0;
255 /* Now wait for Storage daemon to terminate our message thread */
256 set_jcr_job_status(jcr, JS_WaitSD);
258 while (!jcr->sd_msg_thread_done) {
261 struct timespec timeout;
263 gettimeofday(&tv, &tz);
265 timeout.tv_sec = tv.tv_sec + 10; /* wait 10 seconds */
266 Dmsg0(300, "I'm waiting for message thread termination.\n");
267 pthread_cond_timedwait(&jcr->term_wait, &jcr->mutex, &timeout);
268 if (job_canceled(jcr)) {
271 /* Give SD 30 seconds to clean up after cancel */
272 if (cancel_count == 3) {
277 set_jcr_job_status(jcr, JS_Terminated);