3 * Bacula Director -- msgchan.c -- handles the message channel
4 * to the Storage daemon and the File daemon.
6 * Kern Sibbald, August MM
8 * This routine runs as a thread and must be thread reentrant.
10 * Basic tasks done here:
11 * Open a message channel with the Storage daemon
12 * to authenticate ourself and to pass the JobId.
13 * Create a thread to interact with the Storage daemon
14 * who returns a job status and requests Catalog services, etc.
19 Copyright (C) 2000-2003 Kern Sibbald and John Walker
21 This program is free software; you can redistribute it and/or
22 modify it under the terms of the GNU General Public License as
23 published by the Free Software Foundation; either version 2 of
24 the License, or (at your option) any later version.
26 This program is distributed in the hope that it will be useful,
27 but WITHOUT ANY WARRANTY; without even the implied warranty of
28 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
29 General Public License for more details.
31 You should have received a copy of the GNU General Public
32 License along with this program; if not, write to the Free
33 Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
41 /* Commands sent to Storage daemon */
42 static char jobcmd[] = "JobId=%d job=%s job_name=%s client_name=%s \
43 type=%d level=%d FileSet=%s NoAttr=%d SpoolAttr=%d FileSetMD5=%s\n";
44 static char use_device[] = "use device=%s media_type=%s pool_name=%s pool_type=%s\n";
46 /* Response from Storage daemon */
47 static char OKjob[] = "3000 OK Job SDid=%d SDtime=%d Authorization=%100s\n";
48 static char OK_device[] = "3000 OK use device\n";
50 /* Storage Daemon requests */
51 static char Job_start[] = "3010 Job %127s start\n";
52 static char Job_end[] =
53 "3099 Job %127s end JobStatus=%d JobFiles=%d JobBytes=%" lld "\n";
54 static char Job_status[] = "3012 Job %127s jobstatus %d\n";
56 /* Forward referenced functions */
57 static void *msg_thread(void *arg);
60 * Establish a message channel connection with the Storage daemon
61 * and perform authentication.
63 int connect_to_storage_daemon(JCR *jcr, int retry_interval,
64 int max_retry_time, int verbose)
69 * Open message channel with the Storage daemon
71 Dmsg2(200, "bnet_connect to Storage daemon %s:%d\n", jcr->store->address,
73 sd = bnet_connect(jcr, retry_interval, max_retry_time,
74 _("Storage daemon"), jcr->store->address,
75 NULL, jcr->store->SDport, verbose);
79 sd->res = (RES *)jcr->store; /* save pointer to other end */
80 jcr->store_bsock = sd;
82 if (!authenticate_storage_daemon(jcr)) {
89 * Start a job with the Storage daemon
91 int start_storage_daemon_job(JCR *jcr)
97 POOLMEM *device_name, *pool_name, *pool_type, *media_type;
98 int device_name_len, pool_name_len, pool_type_len, media_type_len;
100 storage = jcr->store;
101 sd = jcr->store_bsock;
103 * Now send JobId and permissions, and get back the authorization key.
105 bash_spaces(jcr->job->hdr.name);
106 bash_spaces(jcr->client->hdr.name);
107 bash_spaces(jcr->fileset->hdr.name);
108 if (jcr->fileset->MD5[0] == 0) {
109 strcpy(jcr->fileset->MD5, "**Dummy**");
111 bnet_fsend(sd, jobcmd, jcr->JobId, jcr->Job, jcr->job->hdr.name,
112 jcr->client->hdr.name, jcr->JobType, jcr->JobLevel,
113 jcr->fileset->hdr.name, !jcr->pool->catalog_files,
114 jcr->job->SpoolAttributes, jcr->fileset->MD5);
115 Dmsg1(200, "Jobcmd=%s\n", sd->msg);
116 unbash_spaces(jcr->job->hdr.name);
117 unbash_spaces(jcr->client->hdr.name);
118 unbash_spaces(jcr->fileset->hdr.name);
119 if (bget_dirmsg(sd) > 0) {
120 Dmsg1(110, "<stored: %s", sd->msg);
121 if (sscanf(sd->msg, OKjob, &jcr->VolSessionId,
122 &jcr->VolSessionTime, &auth_key) != 3) {
123 Dmsg1(100, "BadJob=%s\n", sd->msg);
124 Jmsg(jcr, M_FATAL, 0, _("Storage daemon rejected Job command: %s\n"), sd->msg);
127 jcr->sd_auth_key = bstrdup(auth_key);
128 Dmsg1(150, "sd_auth_key=%s\n", jcr->sd_auth_key);
131 Jmsg(jcr, M_FATAL, 0, _("<stored: bad response to Job command: %s\n"),
137 * Send use device = xxx media = yyy pool = zzz
139 device_name_len = strlen(storage->dev_name) + 1;
140 media_type_len = strlen(storage->media_type) + 1;
141 pool_type_len = strlen(jcr->pool->pool_type) + 1;
142 pool_name_len = strlen(jcr->pool->hdr.name) + 1;
143 device_name = get_memory(device_name_len);
144 pool_name = get_memory(pool_name_len);
145 pool_type = get_memory(pool_type_len);
146 media_type = get_memory(media_type_len);
147 memcpy(device_name, storage->dev_name, device_name_len);
148 memcpy(media_type, storage->media_type, media_type_len);
149 memcpy(pool_type, jcr->pool->pool_type, pool_type_len);
150 memcpy(pool_name, jcr->pool->hdr.name, pool_name_len);
151 bash_spaces(device_name);
152 bash_spaces(media_type);
153 bash_spaces(pool_type);
154 bash_spaces(pool_name);
155 sd->msg = check_pool_memory_size(sd->msg, sizeof(device_name) +
156 device_name_len + media_type_len + pool_type_len + pool_name_len);
157 bnet_fsend(sd, use_device, device_name, media_type, pool_name, pool_type);
158 Dmsg1(110, ">stored: %s", sd->msg);
159 status = response(jcr, sd, OK_device, "Use Device", NO_DISPLAY);
161 pm_strcpy(&pool_type, sd->msg); /* save message */
162 Jmsg(jcr, M_FATAL, 0, _("\n"
163 " Storage daemon didn't accept Device \"%s\" because:\n %s"),
164 device_name, pool_type/* sd->msg */);
167 free_memory(device_name);
168 free_memory(media_type);
169 free_memory(pool_name);
170 free_memory(pool_type);
176 * Start a thread to handle Storage daemon messages and
179 int start_storage_daemon_message_thread(JCR *jcr)
185 jcr->use_count++; /* mark in use by msg thread */
186 jcr->sd_msg_thread_done = false;
187 jcr->SD_msg_chan = 0;
189 if ((status=pthread_create(&thid, NULL, msg_thread, (void *)jcr)) != 0) {
190 Jmsg1(jcr, M_ABORT, 0, _("Cannot create message thread: %s\n"), strerror(status));
192 /* Wait for thread to start */
193 while (jcr->SD_msg_chan == 0) {
199 static void msg_thread_cleanup(void *arg)
201 JCR *jcr = (JCR *)arg;
202 Dmsg0(200, "End msg_thread\n");
203 db_end_transaction(jcr, jcr->db); /* terminate any open transaction */
205 jcr->sd_msg_thread_done = true;
206 pthread_cond_broadcast(&jcr->term_wait); /* wakeup any waiting threads */
208 free_jcr(jcr); /* release jcr */
212 * Handle the message channel (i.e. requests from the
214 * Note, we are running in a separate thread.
216 static void *msg_thread(void *arg)
218 JCR *jcr = (JCR *)arg;
221 char Job[MAX_NAME_LENGTH];
226 pthread_detach(pthread_self());
227 jcr->SD_msg_chan = pthread_self();
228 pthread_cleanup_push(msg_thread_cleanup, arg);
229 Dmsg0(200, "msg_thread\n");
230 sd = jcr->store_bsock;
232 /* Read the Storage daemon's output.
234 Dmsg0(200, "Start msg_thread loop\n");
235 while ((stat=bget_dirmsg(sd)) >= 0) {
236 Dmsg1(200, "<stored: %s", sd->msg);
237 if (sscanf(sd->msg, Job_start, &Job) == 1) {
240 if (sscanf(sd->msg, Job_end, &Job, &JobStatus, &JobFiles,
242 jcr->SDJobStatus = JobStatus; /* termination status */
243 jcr->SDJobFiles = JobFiles;
244 jcr->SDJobBytes = JobBytes;
247 if (sscanf(sd->msg, Job_status, &Job, &JobStatus) == 2) {
248 jcr->SDJobStatus = JobStatus; /* current status */
252 if (is_bnet_error(sd)) {
253 jcr->SDJobStatus = JS_ErrorTerminated;
255 pthread_cleanup_pop(1);
259 void wait_for_storage_daemon_termination(JCR *jcr)
261 int cancel_count = 0;
262 /* Now wait for Storage daemon to terminate our message thread */
263 set_jcr_job_status(jcr, JS_WaitSD);
265 while (!jcr->sd_msg_thread_done) {
268 struct timespec timeout;
270 gettimeofday(&tv, &tz);
272 timeout.tv_sec = tv.tv_sec + 10; /* wait 10 seconds */
273 Dmsg0(300, "I'm waiting for message thread termination.\n");
274 pthread_cond_timedwait(&jcr->term_wait, &jcr->mutex, &timeout);
275 if (job_canceled(jcr)) {
278 /* Give SD 30 seconds to clean up after cancel */
279 if (cancel_count == 3) {
284 set_jcr_job_status(jcr, JS_Terminated);