2 Bacula® - The Network Backup Solution
4 Copyright (C) 2001-2014 Free Software Foundation Europe e.V.
6 The main author of Bacula is Kern Sibbald, with contributions from many
7 others, a complete list can be found in the file AUTHORS.
9 You may use this file and others of this release according to the
10 license defined in the LICENSE file, which includes the Affero General
11 Public License, v3.0 ("AGPLv3") and some additional permissions and
12 terms pursuant to its AGPLv3 Section 7.
14 Bacula® is a registered trademark of Kern Sibbald.
17 * This file handles accepting Director Commands
19 * Most Director commands are handled here, with the
20 * exception of the Job command command and subsequent
21 * subcommands that are handled
24 * N.B. in this file, in general we must use P(dev->mutex) rather
25 * than dev->rLock() so that we can examine the blocked
26 * state rather than blocking ourselves because a Job
27 * thread has the device blocked. In some "safe" cases,
28 * we can do things to a blocked device. CAREFUL!!!!
30 * File daemon commands are handled in fdcmd.c
32 * Written by Kern Sibbald, May MMI
39 /* Exported variables */
41 /* Imported variables */
42 extern BSOCK *filed_chan;
43 extern struct s_last_job last_job;
44 extern bool init_done;
46 /* Static variables */
47 static char derrmsg[] = "3900 Invalid command:";
48 static char OKsetdebug[] = "3000 OK setdebug=%ld trace=%ld options=%s tags=%s\n";
49 static char invalid_cmd[] = "3997 Invalid command for a Director with Monitor directive enabled.\n";
50 static char OK_bootstrap[] = "3000 OK bootstrap\n";
51 static char ERROR_bootstrap[] = "3904 Error bootstrap\n";
52 static char OKclient[] = "3000 OK client command\n";
54 /* Imported functions */
55 extern void terminate_child();
56 extern bool job_cmd(JCR *jcr);
57 extern bool use_cmd(JCR *jcr);
58 extern bool run_cmd(JCR *jcr);
59 extern bool status_cmd(JCR *sjcr);
60 extern bool qstatus_cmd(JCR *jcr);
61 //extern bool query_cmd(JCR *jcr);
63 /* Forward referenced functions */
64 static bool client_cmd(JCR *jcr);
65 static bool storage_cmd(JCR *jcr);
66 static bool label_cmd(JCR *jcr);
67 static bool die_cmd(JCR *jcr);
68 static bool relabel_cmd(JCR *jcr);
69 static bool readlabel_cmd(JCR *jcr);
70 static bool release_cmd(JCR *jcr);
71 static bool setdebug_cmd(JCR *jcr);
72 static bool cancel_cmd(JCR *cjcr);
73 static bool mount_cmd(JCR *jcr);
74 static bool unmount_cmd(JCR *jcr);
75 //static bool action_on_purge_cmd(JCR *jcr);
76 static bool bootstrap_cmd(JCR *jcr);
77 static bool changer_cmd(JCR *sjcr);
78 static bool do_label(JCR *jcr, int relabel);
79 static DCR *find_device(JCR *jcr, POOL_MEM &dev_name,
80 POOLMEM *media_type, int drive);
81 static void read_volume_label(JCR *jcr, DCR *dcr, DEVICE *dev, int Slot);
82 static void label_volume_if_ok(DCR *dcr, char *oldname,
83 char *newname, char *poolname,
84 int Slot, int relabel);
85 static bool try_autoload_device(JCR *jcr, DCR *dcr, int slot, const char *VolName);
86 static void send_dir_busy_message(BSOCK *dir, DEVICE *dev);
88 /* Responses send to Director for storage command */
89 static char BADcmd[] = "2902 Bad %s\n";
90 static char OKstore[] = "2000 OK storage\n";
92 /* Commands received from director that need scanning */
93 static char storaddr[] = "storage address=%s port=%d ssl=%d Job=%127s Authentication=%127s";
97 bool (*func)(JCR *jcr);
98 bool monitoraccess; /* set if monitors can access this cmd */
102 * The following are the recognized commands from the Director.
104 static struct s_cmds cmds[] = {
105 {"JobId=", job_cmd, 0}, /* start Job */
106 {"autochanger", changer_cmd, 0},
107 {"bootstrap", bootstrap_cmd, 0},
108 {"cancel", cancel_cmd, 0},
109 {"client", client_cmd, 0}, /* client address */
110 {".die", die_cmd, 0},
111 {"label", label_cmd, 0}, /* label a tape */
112 {"mount", mount_cmd, 0},
113 {"readlabel", readlabel_cmd, 0},
114 {"release", release_cmd, 0},
115 {"relabel", relabel_cmd, 0}, /* relabel a tape */
116 {"setdebug=", setdebug_cmd, 0}, /* set debug level */
117 {"status", status_cmd, 1},
118 {".status", qstatus_cmd, 1},
119 {"stop", cancel_cmd, 0},
120 {"storage", storage_cmd, 0}, /* get SD addr from Dir */
121 {"unmount", unmount_cmd, 0},
122 {"use storage=", use_cmd, 0},
124 // {"query", query_cmd, 0},
125 {NULL, NULL} /* list terminator */
130 * Connection request. We accept connections either from the
131 * Director or a Client (File daemon).
133 * Note, we are running as a seperate thread of the Storage daemon.
134 * and it is because a Director has made a connection with
135 * us on the "Message" channel.
137 * Basic tasks done here:
138 * - Create a JCR record
139 * - If it was from the FD, call handle_filed_connection()
140 * - Authenticate the Director
141 * - We wait for a command
142 * - We execute the command
143 * - We continue or exit depending on the return status
145 void *handle_connection_request(void *arg)
147 BSOCK *bs = (BSOCK *)arg;
150 int fd_version, sd_version;
156 if (bs->recv() <= 0) {
157 Jmsg1(NULL, M_ERROR, 0, _("Connection request from %s failed.\n"), bs->who());
158 bmicrosleep(5, 0); /* make user wait 5 seconds */
164 * Do a sanity check on the message received
166 if (bs->msglen < 25 || bs->msglen > (int)sizeof(name)) {
167 Pmsg1(000, "<filed: %s", bs->msg);
168 Jmsg2(NULL, M_ERROR, 0, _("Invalid connection from %s. Len=%d\n"), bs->who(), bs->msglen);
169 bmicrosleep(5, 0); /* make user wait 5 seconds */
174 Dmsg1(100, "Conn: %s", bs->msg);
178 * See if this is a File daemon connection. If so
181 if (sscanf(bs->msg, "Hello Bacula SD: Start Job %127s %d %d", name, &fd_version, &sd_version) == 3 ||
182 sscanf(bs->msg, "Hello FD: Bacula Storage calling Start Job %127s %d", name, &sd_version) == 2 ||
183 sscanf(bs->msg, "Hello Start Job %127s", name) == 1) {
184 Dmsg1(050, "Got a FD connection at %s\n", bstrftimes(tbuf, sizeof(tbuf),
185 (utime_t)time(NULL)));
186 Dmsg1(50, "%s", bs->msg);
187 handle_filed_connection(bs, name, fd_version, sd_version);
192 * This is a connection from the Director, so setup a JCR
194 Dmsg1(050, "Got a DIR connection at %s\n", bstrftimes(tbuf, sizeof(tbuf),
195 (utime_t)time(NULL)));
196 jcr = new_jcr(sizeof(JCR), stored_free_jcr); /* create Job Control Record */
197 jcr->dir_bsock = bs; /* save Director bsock */
198 jcr->dir_bsock->set_jcr(jcr);
199 jcr->dcrs = New(alist(10, not_owned_by_alist));
200 /* Initialize FD start condition variable */
201 int errstat = pthread_cond_init(&jcr->job_start_wait, NULL);
204 Jmsg1(jcr, M_FATAL, 0, _("Unable to init job cond variable: ERR=%s\n"), be.bstrerror(errstat));
208 Dmsg0(1000, "stored in start_job\n");
211 * Authenticate the Director
213 /* We should have: Hello SD: Bacula Director <dirname> calling */
214 if (!authenticate_director(jcr)) {
215 Jmsg(jcr, M_FATAL, 0, _("Unable to authenticate Director\n"));
218 Dmsg0(90, "Message channel init completed.\n");
220 for (quit=false; !quit;) {
222 if ((bnet_stat = bs->recv()) <= 0) {
223 break; /* connection terminated */
225 Dmsg1(199, "<dird: %s\n", bs->msg);
226 /* Ensure that device initialization is complete */
231 for (i=0; cmds[i].cmd; i++) {
232 if (strncmp(cmds[i].cmd, bs->msg, strlen(cmds[i].cmd)) == 0) {
233 if ((!cmds[i].monitoraccess) && (jcr->director->monitor)) {
234 Dmsg1(100, "Command \"%s\" is invalid.\n", cmds[i].cmd);
235 bs->fsend(invalid_cmd);
236 bs->signal(BNET_EOD);
239 Dmsg1(200, "Do command: %s\n", cmds[i].cmd);
240 if (!cmds[i].func(jcr)) { /* do command */
241 quit = true; /* error, get out */
242 Dmsg1(190, "Command %s requests quit\n", cmds[i].cmd);
244 found = true; /* indicate command found */
248 if (!found) { /* command not found */
250 Mmsg(err_msg, "%s %s\n", derrmsg, bs->msg);
251 bs->fsend(err_msg.c_str());
256 generate_daemon_event(jcr, "JobEnd");
257 generate_plugin_event(jcr, bsdEventJobEnd);
258 dequeue_messages(jcr); /* send any queued messages */
259 bs->signal(BNET_TERMINATE);
260 free_plugins(jcr); /* release instantiated plugins */
267 * Force SD to die, and hopefully dump itself. Turned on only
268 * in development version.
270 static bool die_cmd(JCR *jcr)
275 BSOCK *dir = jcr->dir_bsock;
276 pthread_mutex_t m=PTHREAD_MUTEX_INITIALIZER;
278 if (strstr(dir->msg, "deadlock")) {
279 Pmsg0(000, "I have been requested to deadlock ...\n");
284 Pmsg1(000, "I have been requested to die ... (%s)\n", dir->msg);
285 a = djcr->JobId; /* ref NULL pointer */
292 * Get address of client from Director
293 * We attempt to connect to the client (an FD or SD) and
296 static bool client_cmd(JCR *jcr)
298 int client_port; /* client port */
299 int enable_ssl; /* enable ssl */
300 BSOCK *dir = jcr->dir_bsock;
301 BSOCK *cl = new_bsock(); /* client bsock */
303 Dmsg1(100, "ClientCmd: %s", dir->msg);
304 jcr->sd_calls_client = true;
305 if (sscanf(dir->msg, "client address=%s port=%d ssl=%d", &jcr->client_addr, &client_port,
307 pm_strcpy(jcr->errmsg, dir->msg);
308 Jmsg(jcr, M_FATAL, 0, _("Bad client command: %s"), jcr->errmsg);
309 Dmsg1(050, "Bad client command: %s", jcr->errmsg);
313 Dmsg3(110, "Connect to client: %s:%d ssl=%d\n", jcr->client_addr, client_port,
315 /* Open command communications with Client */
316 /* Try to connect for 1 hour at 10 second intervals */
317 if (!cl->connect(jcr, 10, (int)me->ClientConnectTimeout, me->heartbeat_interval,
318 _("Client daemon"), jcr->client_addr, NULL, client_port, 1)) {
319 /* destroy() OK because cl is local */
321 Jmsg(jcr, M_FATAL, 0, _("Failed to connect to Client daemon: %s:%d\n"),
322 jcr->client_addr, client_port);
323 Dmsg2(100, "Failed to connect to Client daemon: %s:%d\n",
324 jcr->client_addr, client_port);
327 Dmsg0(110, "SD connection OK to Client.\n");
330 cl->fsend("Hello FD: Bacula Storage calling Start Job %s 1\n", jcr->Job);
331 jcr->file_bsock = cl;
332 jcr->file_bsock->set_jcr(jcr);
333 /* Send OK to Director */
334 return dir->fsend(OKclient);
337 jcr->setJobStatus(JS_ErrorTerminated);
338 dir->fsend("3902 Bad %s cmd\n", "client");
343 * Get address of storage daemon from Director
345 static bool storage_cmd(JCR *jcr)
347 int stored_port; /* storage daemon port */
348 int enable_ssl; /* enable ssl to sd */
349 char sd_auth_key[200];
350 BSOCK *dir = jcr->dir_bsock;
351 BSOCK *sd = new_bsock(); /* storage daemon bsock */
352 char Job[MAX_NAME_LENGTH];
354 Dmsg1(050, "StorageCmd: %s", dir->msg);
355 if (sscanf(dir->msg, storaddr, &jcr->stored_addr, &stored_port,
356 &enable_ssl, Job, sd_auth_key) != 5) {
357 pm_strcpy(jcr->errmsg, dir->msg);
358 Jmsg(jcr, M_FATAL, 0, _("Bad storage command: %s"), jcr->errmsg);
359 Pmsg1(010, "Bad storage command: %s", jcr->errmsg);
364 if (jcr->sd_auth_key) {
365 bfree_and_null(jcr->sd_auth_key);
366 jcr->sd_auth_key = bstrdup(sd_auth_key);
368 if (stored_port != 0) {
369 Dmsg2(050, "sd_calls=%d sd_client=%d\n", jcr->sd_calls_client,
371 jcr->sd_calls_client = false; /* We are doing the connecting */
372 Dmsg3(050, "Connect to storage and wait: %s:%d ssl=%d\n", jcr->stored_addr, stored_port,
374 /* Open command communications with Storage daemon */
375 /* Try to connect for 1 hour at 10 second intervals */
376 if (!sd->connect(jcr, 10, (int)me->ClientConnectTimeout, me->heartbeat_interval,
377 _("Storage daemon"), jcr->stored_addr, NULL, stored_port, 1)) {
378 /* destroy() OK because sd is local */
380 Jmsg(jcr, M_FATAL, 0, _("Failed to connect to Storage daemon: %s:%d\n"),
381 jcr->stored_addr, stored_port);
382 Dmsg2(010, "Failed to connect to Storage daemon: %s:%d\n",
383 jcr->stored_addr, stored_port);
387 Dmsg0(050, "Connection OK to SD.\n");
389 jcr->store_bsock = sd;
390 } else { /* The storage daemon called us */
391 jcr->sd_calls_client = true;
392 /* We should already have a storage connection! */
393 if (jcr->file_bsock && jcr->store_bsock == NULL) {
394 jcr->store_bsock = jcr->file_bsock;
396 if (jcr->store_bsock == NULL) {
397 Jmsg0(jcr, M_FATAL, 0, _("In storage_cmd port==0, no prior Storage connection.\n"));
398 Pmsg0(010, "In storage_cmd port==0, no prior Storage connection.\n");
403 if (!authenticate_storagedaemon(jcr, Job)) {
407 * We are a client so we read from the socket we just
408 * opened as if we were a FD, so set file_bsock and
409 * clear the store_bsock.
411 jcr->file_bsock = jcr->store_bsock;
412 jcr->store_bsock = NULL;
413 jcr->authenticated = true; /* Dir authentication is sufficient */
414 Dmsg1(050, "=== Storage_cmd authenticated Job=%s with SD.\n", Job);
416 /* Send OK to Director */
417 return dir->fsend(OKstore);
420 Dmsg0(100, "Send storage command failed.\n");
421 dir->fsend(BADcmd, "storage");
427 * Set debug level as requested by the Director
430 static bool setdebug_cmd(JCR *jcr)
432 BSOCK *dir = jcr->dir_bsock;
433 int32_t trace_flag, lvl, hangup; /* hangup is ignored right now */
437 *tags = *options = 0;
439 Dmsg1(10, "setdebug_cmd: %s", dir->msg);
441 if (sscanf(dir->msg, "setdebug=%ld trace=%ld hangup=%ld options=%55s tags=%511s",
442 &lvl, &trace_flag, &hangup, options, tags) != 5)
444 if (sscanf(dir->msg, "setdebug=%ld trace=%ld", &lvl, &trace_flag) != 2 || lvl < 0) {
445 dir->fsend(_("3991 Bad setdebug command: %s\n"), dir->msg);
450 set_trace(trace_flag);
451 set_debug_flags(options);
452 if (!debug_parse_tags(tags, &level)) {
457 return dir->fsend(OKsetdebug, lvl, trace_flag, options, tags);
463 * Be careful, we switch to using the job's JCR! So, using
464 * BSOCKs on that jcr can have two threads in the same code.
466 static bool cancel_cmd(JCR *cjcr)
468 BSOCK *dir = cjcr->dir_bsock;
470 char Job[MAX_NAME_LENGTH];
475 if (sscanf(dir->msg, "cancel Job=%127s", Job) == 1) {
476 status = JS_Canceled;
479 dir->fsend(_("3903 Error scanning cancel command.\n"));
482 if (!(jcr=get_jcr_by_full_name(Job))) {
483 dir->fsend(_("3904 Job %s not found.\n"), Job);
485 oldStatus = jcr->JobStatus;
486 jcr->setJobStatus(status);
487 Dmsg2(800, "Cancel JobId=%d %p\n", jcr->JobId, jcr);
488 if (!jcr->authenticated && oldStatus == JS_WaitFD) {
489 pthread_cond_signal(&jcr->job_start_wait); /* wake waiting thread */
491 if (jcr->file_bsock) {
492 jcr->file_bsock->set_terminated();
493 jcr->file_bsock->set_timed_out();
494 Dmsg2(800, "Term bsock jid=%d %p\n", jcr->JobId, jcr);
496 /* Still waiting for FD to connect, release it */
497 pthread_cond_signal(&jcr->job_start_wait); /* wake waiting job */
498 Dmsg2(800, "Signal FD connect jid=%d %p\n", jcr->JobId, jcr);
500 /* If thread waiting on mount, wake him */
501 if (jcr->dcr && jcr->dcr->dev && jcr->dcr->dev->waiting_for_mount()) {
502 pthread_cond_broadcast(&jcr->dcr->dev->wait_next_vol);
503 Dmsg1(100, "JobId=%u broadcast wait_device_release\n", (uint32_t)jcr->JobId);
504 pthread_cond_broadcast(&wait_device_release);
506 if (jcr->read_dcr && jcr->read_dcr->dev && jcr->read_dcr->dev->waiting_for_mount()) {
507 pthread_cond_broadcast(&jcr->read_dcr->dev->wait_next_vol);
508 Dmsg1(100, "JobId=%u broadcast wait_device_release\n", (uint32_t)jcr->JobId);
509 pthread_cond_broadcast(&wait_device_release);
511 jcr->my_thread_send_signal(TIMEOUT_SIGNAL);
512 dir->fsend(_("3000 JobId=%ld Job=\"%s\" marked to be %s.\n"),
513 jcr->JobId, jcr->Job, reason);
518 dir->signal(BNET_EOD);
526 static bool label_cmd(JCR *jcr)
528 return do_label(jcr, 0);
531 static bool relabel_cmd(JCR *jcr)
533 return do_label(jcr, 1);
536 static bool do_label(JCR *jcr, int relabel)
538 POOLMEM *newname, *oldname, *poolname, *mtype;
540 BSOCK *dir = jcr->dir_bsock;
546 newname = get_memory(dir->msglen+1);
547 oldname = get_memory(dir->msglen+1);
548 poolname = get_memory(dir->msglen+1);
549 mtype = get_memory(dir->msglen+1);
551 if (sscanf(dir->msg, "relabel %127s OldName=%127s NewName=%127s PoolName=%127s "
552 "MediaType=%127s Slot=%d drive=%d",
553 dev_name.c_str(), oldname, newname, poolname, mtype,
554 &slot, &drive) == 7) {
559 if (sscanf(dir->msg, "label %127s VolumeName=%127s PoolName=%127s "
560 "MediaType=%127s Slot=%d drive=%d",
561 dev_name.c_str(), newname, poolname, mtype, &slot, &drive) == 6) {
566 unbash_spaces(newname);
567 unbash_spaces(oldname);
568 unbash_spaces(poolname);
569 unbash_spaces(mtype);
570 dcr = find_device(jcr, dev_name, mtype, drive);
573 dev->Lock(); /* Use P to avoid indefinite block */
574 if (!dev->is_open() && !dev->is_busy()) {
575 Dmsg1(400, "Can %slabel. Device is not open\n", relabel?"re":"");
576 label_volume_if_ok(dcr, oldname, newname, poolname, slot, relabel);
578 /* Under certain "safe" conditions, we can steal the lock */
579 } else if (dev->can_steal_lock()) {
580 Dmsg0(400, "Can relabel. can_steal_lock\n");
581 label_volume_if_ok(dcr, oldname, newname, poolname, slot, relabel);
582 } else if (dev->is_busy() || dev->is_blocked()) {
583 send_dir_busy_message(dir, dev);
584 } else { /* device not being used */
585 Dmsg0(400, "Can relabel. device not used\n");
586 label_volume_if_ok(dcr, oldname, newname, poolname, slot, relabel);
591 dir->fsend(_("3999 Device \"%s\" not found or could not be opened.\n"), dev_name.c_str());
594 /* NB dir->msg gets clobbered in bnet_fsend, so save command */
595 pm_strcpy(jcr->errmsg, dir->msg);
596 dir->fsend(_("3903 Error scanning label command: %s\n"), jcr->errmsg);
598 free_memory(oldname);
599 free_memory(newname);
600 free_memory(poolname);
602 dir->signal(BNET_EOD);
607 * Read the tape label and determine if we can safely
608 * label the tape (not a Bacula volume), then label it.
610 * Enter with the mutex set
612 static void label_volume_if_ok(DCR *dcr, char *oldname,
613 char *newname, char *poolname,
614 int slot, int relabel)
616 BSOCK *dir = dcr->jcr->dir_bsock;
618 DEVICE *dev = dcr->dev;
621 const char *volname = (relabel == 1) ? oldname : newname;
624 steal_device_lock(dev, &hold, BST_WRITING_LABEL);
625 Dmsg1(100, "Stole device %s lock, writing label.\n", dev->print_name());
627 Dmsg0(90, "try_autoload_device - looking for volume_info\n");
628 if (!try_autoload_device(dcr->jcr, dcr, slot, volname)) {
629 goto bail_out; /* error */
632 /* Ensure that the device is open -- autoload_device() closes it */
633 if (dev->is_tape()) {
634 mode = OPEN_READ_WRITE;
636 mode = CREATE_READ_WRITE;
640 dev->truncating = true; /* let open() know we will truncate it */
642 /* Set old volume name for open if relabeling */
643 dcr->setVolCatName(volname);
644 if (!dev->open(dcr, mode)) {
645 dir->fsend(_("3910 Unable to open device \"%s\": ERR=%s\n"),
646 dev->print_name(), dev->bstrerror());
650 /* See what we have for a Volume */
651 label_status = read_dev_volume_label(dcr);
653 /* Set new volume name */
654 dcr->setVolCatName(newname);
655 switch(label_status) {
657 case VOL_VERSION_ERROR:
658 case VOL_LABEL_ERROR:
662 "3920 Cannot label Volume because it is already labeled: \"%s\"\n"),
663 dev->VolHdr.VolumeName);
667 /* Relabel request. If oldname matches, continue */
668 if (strcmp(oldname, dev->VolHdr.VolumeName) != 0) {
669 dir->fsend(_("3921 Wrong volume mounted.\n"));
672 if (dev->label_type != B_BACULA_LABEL) {
673 dir->fsend(_("3922 Cannot relabel an ANSI/IBM labeled Volume.\n"));
676 /* Fall through wanted! */
679 if (!write_new_volume_label_to_dev(dcr, newname, poolname,
680 relabel, true /* write dvd now */)) {
681 dir->fsend(_("3912 Failed to label Volume: ERR=%s\n"), dev->bstrerror());
684 bstrncpy(dcr->VolumeName, newname, sizeof(dcr->VolumeName));
685 /* The following 3000 OK label. string is scanned in ua_label.c */
686 dir->fsend("3000 OK label. VolBytes=%s DVD=%d Volume=\"%s\" Device=%s\n",
687 edit_uint64(dev->VolCatInfo.VolCatBytes, ed1),
688 dev->is_dvd()?1:0, newname, dev->print_name());
691 dir->fsend(_("3915 Failed to label Volume: ERR=%s\n"), dev->errmsg);
694 dir->fsend(_("3914 Failed to label Volume (no media): ERR=%s\n"), dev->bstrerror());
697 dir->fsend(_("3913 Cannot label Volume. "
698 "Unknown status %d from read_volume_label()\n"), label_status);
703 if (dev->is_open() && !dev->has_cap(CAP_ALWAYSOPEN)) {
706 if (!dev->is_open()) {
709 volume_unused(dcr); /* no longer using volume */
710 give_back_device_lock(dev, &hold);
716 * Read the tape label
718 * Enter with the mutex set
720 static bool read_label(DCR *dcr)
724 BSOCK *dir = jcr->dir_bsock;
726 DEVICE *dev = dcr->dev;
728 steal_device_lock(dev, &hold, BST_DOING_ACQUIRE);
730 dcr->VolumeName[0] = 0;
731 dev->clear_labeled(); /* force read of label */
732 switch (read_dev_volume_label(dcr)) {
734 dir->fsend(_("3001 Mounted Volume: %s\n"), dev->VolHdr.VolumeName);
738 dir->fsend(_("3902 Cannot mount Volume on Storage Device \"%s\" because:\n%s"),
739 dev->print_name(), jcr->errmsg);
744 give_back_device_lock(dev, &hold);
749 * Searches for device by name, and if found, creates a dcr and
752 static DCR *find_device(JCR *jcr, POOL_MEM &devname,
753 POOLMEM *media_type, int drive)
756 AUTOCHANGER *changer;
760 unbash_spaces(devname);
761 foreach_res(device, R_DEVICE) {
762 /* Find resource, and make sure we were able to open it */
763 if (strcmp(device->hdr.name, devname.c_str()) == 0 &&
764 (!media_type || strcmp(device->media_type, media_type) ==0)) {
766 device->dev = init_dev(jcr, device);
769 Jmsg(jcr, M_WARNING, 0, _("\n"
770 " Device \"%s\" requested by DIR could not be opened or does not exist.\n"),
774 Dmsg1(20, "Found device %s\n", device->hdr.name);
780 foreach_res(changer, R_AUTOCHANGER) {
781 /* Find resource, and make sure we were able to open it */
782 if (strcmp(devname.c_str(), changer->hdr.name) == 0) {
783 /* Try each device in this AutoChanger */
784 foreach_alist(device, changer->device) {
785 Dmsg1(100, "Try changer device %s\n", device->hdr.name);
787 device->dev = init_dev(jcr, device);
790 Dmsg1(100, "Device %s could not be opened. Skipped\n", devname.c_str());
791 Jmsg(jcr, M_WARNING, 0, _("\n"
792 " Device \"%s\" in changer \"%s\" requested by DIR could not be opened or does not exist.\n"),
793 device->hdr.name, devname.c_str());
796 if (!device->dev->autoselect) {
797 Dmsg1(100, "Device %s not autoselect skipped.\n", devname.c_str());
798 continue; /* device is not available */
800 if ((drive < 0 || drive == (int)device->dev->drive_index) &&
801 (!media_type || strcmp(device->media_type, media_type) ==0)) {
802 Dmsg1(20, "Found changer device %s\n", device->hdr.name);
806 Dmsg3(100, "Device %s drive wrong: want=%d got=%d skipping\n",
807 devname.c_str(), drive, (int)device->dev->drive_index);
809 break; /* we found it but could not open a device */
815 Dmsg1(100, "Found device %s\n", device->hdr.name);
816 dcr = new_dcr(jcr, NULL, device->dev);
817 dcr->device = device;
824 * Mount command from Director
826 static bool mount_cmd(JCR *jcr)
829 BSOCK *dir = jcr->dir_bsock;
836 ok = sscanf(dir->msg, "mount %127s drive=%d slot=%d", devname.c_str(),
839 ok = sscanf(dir->msg, "mount %127s drive=%d", devname.c_str(), &drive) == 2;
841 Dmsg3(100, "ok=%d drive=%d slot=%d\n", ok, drive, slot);
843 dcr = find_device(jcr, devname, NULL, drive);
846 dev->Lock(); /* Use P to avoid indefinite block */
847 Dmsg2(100, "mount cmd blocked=%d must_unload=%d\n", dev->blocked(),
849 switch (dev->blocked()) { /* device blocked? */
850 case BST_WAITING_FOR_SYSOP:
851 /* Someone is waiting, wake him */
852 Dmsg0(100, "Waiting for mount. Attempting to wake thread\n");
853 dev->set_blocked(BST_MOUNT);
854 dir->fsend("3001 OK mount requested. %sDevice=%s\n",
855 slot>0?_("Specified slot ignored. "):"",
857 Dmsg1(100, "JobId=%u broadcast wait_next_vol\n", (uint32_t)dcr->jcr->JobId);
858 pthread_cond_broadcast(&dev->wait_next_vol);
859 Dmsg1(100, "JobId=%u broadcast wait_device_release\n", (uint32_t)dcr->jcr->JobId);
860 pthread_cond_broadcast(&wait_device_release);
863 /* In both of these two cases, we (the user) unmounted the Volume */
864 case BST_UNMOUNTED_WAITING_FOR_SYSOP:
866 Dmsg2(100, "Unmounted changer=%d slot=%d\n", dev->is_autochanger(), slot);
867 if (dev->is_autochanger() && slot > 0) {
868 try_autoload_device(jcr, dcr, slot, "");
870 /* We freed the device, so reopen it and wake any waiting threads */
871 if (!dev->open(dcr, OPEN_READ_ONLY)) {
872 dir->fsend(_("3901 Unable to open device \"%s\": ERR=%s\n"),
873 dev->print_name(), dev->bstrerror());
874 if (dev->blocked() == BST_UNMOUNTED) {
875 /* We blocked the device, so unblock it */
876 Dmsg0(100, "Unmounted. Unblocking device\n");
881 read_dev_volume_label(dcr);
882 if (dev->blocked() == BST_UNMOUNTED) {
883 /* We blocked the device, so unblock it */
884 Dmsg0(100, "Unmounted. Unblocking device\n");
885 read_label(dcr); /* this should not be necessary */
888 Dmsg0(100, "Unmounted waiting for mount. Attempting to wake thread\n");
889 dev->set_blocked(BST_MOUNT);
891 if (dev->is_labeled()) {
892 dir->fsend(_("3001 Device \"%s\" is mounted with Volume \"%s\"\n"),
893 dev->print_name(), dev->VolHdr.VolumeName);
895 dir->fsend(_("3905 Device \"%s\" open but no Bacula volume is mounted.\n"
896 "If this is not a blank tape, try unmounting and remounting the Volume.\n"),
899 pthread_cond_broadcast(&dev->wait_next_vol);
900 Dmsg1(100, "JobId=%u broadcast wait_device_release\n", (uint32_t)dcr->jcr->JobId);
901 pthread_cond_broadcast(&wait_device_release);
904 case BST_DOING_ACQUIRE:
905 dir->fsend(_("3001 Device \"%s\" is doing acquire.\n"),
909 case BST_WRITING_LABEL:
910 dir->fsend(_("3903 Device \"%s\" is being labeled.\n"),
914 case BST_NOT_BLOCKED:
915 Dmsg2(100, "Not blocked changer=%d slot=%d\n", dev->is_autochanger(), slot);
916 if (dev->is_autochanger() && slot > 0) {
917 try_autoload_device(jcr, dcr, slot, "");
919 if (dev->is_open()) {
920 if (dev->is_labeled()) {
921 dir->fsend(_("3001 Device \"%s\" is mounted with Volume \"%s\"\n"),
922 dev->print_name(), dev->VolHdr.VolumeName);
924 dir->fsend(_("3905 Device \"%s\" open but no Bacula volume is mounted.\n"
925 "If this is not a blank tape, try unmounting and remounting the Volume.\n"),
928 } else if (dev->is_tape()) {
929 if (!dev->open(dcr, OPEN_READ_ONLY)) {
930 dir->fsend(_("3901 Unable to open device \"%s\": ERR=%s\n"),
931 dev->print_name(), dev->bstrerror());
935 if (dev->is_labeled()) {
936 dir->fsend(_("3001 Device \"%s\" is already mounted with Volume \"%s\"\n"),
937 dev->print_name(), dev->VolHdr.VolumeName);
939 dir->fsend(_("3905 Device \"%s\" open but no Bacula volume is mounted.\n"
940 "If this is not a blank tape, try unmounting and remounting the Volume.\n"),
943 if (dev->is_open() && !dev->has_cap(CAP_ALWAYSOPEN)) {
946 } else if (dev->is_unmountable()) {
948 dir->fsend(_("3002 Device \"%s\" is mounted.\n"), dev->print_name());
950 dir->fsend(_("3907 %s"), dev->bstrerror());
952 } else { /* must be file */
953 dir->fsend(_("3906 File device \"%s\" is always mounted.\n"),
955 pthread_cond_broadcast(&dev->wait_next_vol);
956 Dmsg1(100, "JobId=%u broadcast wait_device_release\n", (uint32_t)dcr->jcr->JobId);
957 pthread_cond_broadcast(&wait_device_release);
962 dir->fsend(_("3930 Device \"%s\" is being released.\n"), dev->print_name());
966 dir->fsend(_("3905 Unknown wait state %d\n"), dev->blocked());
972 dir->fsend(_("3999 Device \"%s\" not found or could not be opened.\n"), devname.c_str());
975 pm_strcpy(jcr->errmsg, dir->msg);
976 dir->fsend(_("3909 Error scanning mount command: %s\n"), jcr->errmsg);
978 dir->signal(BNET_EOD);
983 * unmount command from Director
985 static bool unmount_cmd(JCR *jcr)
988 BSOCK *dir = jcr->dir_bsock;
993 if (sscanf(dir->msg, "unmount %127s drive=%d", devname.c_str(), &drive) == 2) {
994 dcr = find_device(jcr, devname, NULL, drive);
997 dev->Lock(); /* Use P to avoid indefinite block */
998 if (!dev->is_open()) {
999 if (!dev->is_busy()) {
1000 unload_autochanger(dcr, -1);
1002 if (dev->is_unmountable()) {
1003 if (dev->unmount(0)) {
1004 dir->fsend(_("3002 Device \"%s\" unmounted.\n"),
1007 dir->fsend(_("3907 %s"), dev->bstrerror());
1010 Dmsg0(90, "Device already unmounted\n");
1011 dir->fsend(_("3901 Device \"%s\" is already unmounted.\n"),
1014 } else if (dev->blocked() == BST_WAITING_FOR_SYSOP) {
1015 Dmsg2(90, "%d waiter dev_block=%d. doing unmount\n", dev->num_waiting,
1017 if (!unload_autochanger(dcr, -1)) {
1019 * ***FIXME**** what is this ???? -- probably we had
1020 * the wrong volume so we must free it and try again. KES
1025 if (dev->is_unmountable() && !dev->unmount(0)) {
1026 dir->fsend(_("3907 %s"), dev->bstrerror());
1028 dev->set_blocked(BST_UNMOUNTED_WAITING_FOR_SYSOP);
1029 dir->fsend(_("3001 Device \"%s\" unmounted.\n"),
1033 } else if (dev->blocked() == BST_DOING_ACQUIRE) {
1034 dir->fsend(_("3902 Device \"%s\" is busy in acquire.\n"),
1037 } else if (dev->blocked() == BST_WRITING_LABEL) {
1038 dir->fsend(_("3903 Device \"%s\" is being labeled.\n"),
1041 } else if (dev->is_busy()) {
1042 send_dir_busy_message(dir, dev);
1043 } else { /* device not being used */
1044 Dmsg0(90, "Device not in use, unmounting\n");
1045 /* On FreeBSD, I am having ASSERT() failures in block_device()
1046 * and I can only imagine that the thread id that we are
1047 * leaving in no_wait_id is being re-used. So here,
1048 * we simply do it by hand. Gross, but a solution.
1050 /* block_device(dev, BST_UNMOUNTED); replace with 2 lines below */
1051 dev->set_blocked(BST_UNMOUNTED);
1052 clear_thread_id(dev->no_wait_id);
1053 if (!unload_autochanger(dcr, -1)) {
1057 if (dev->is_unmountable() && !dev->unmount(0)) {
1058 dir->fsend(_("3907 %s"), dev->bstrerror());
1060 dir->fsend(_("3002 Device \"%s\" unmounted.\n"),
1067 dir->fsend(_("3999 Device \"%s\" not found or could not be opened.\n"), devname.c_str());
1070 /* NB dir->msg gets clobbered in bnet_fsend, so save command */
1071 pm_strcpy(jcr->errmsg, dir->msg);
1072 dir->fsend(_("3907 Error scanning unmount command: %s\n"), jcr->errmsg);
1074 dir->signal(BNET_EOD);
1080 * The truncate command will recycle a volume. The director can call this
1081 * after purging a volume so that disk space will not be wasted. Only useful
1082 * for File Storage, of course.
1085 * It is currently disabled
1087 static bool action_on_purge_cmd(JCR *jcr)
1089 BSOCK *dir = jcr->dir_bsock;
1091 char devname[MAX_NAME_LENGTH];
1092 char volumename[MAX_NAME_LENGTH];
1095 /* TODO: Need to find a free device and ask for slot to the director */
1096 if (sscanf(dir->msg,
1097 "action_on_purge %127s vol=%127s action=%d",
1098 devname, volumename, &action)!= 5)
1100 dir->fsend(_("3916 Error scanning action_on_purge command\n"));
1103 unbash_spaces(volumename);
1104 unbash_spaces(devname);
1106 /* Check if action is correct */
1107 if (action & AOP_TRUNCTATE) {
1113 dir->signal(BNET_EOD);
1119 * Release command from Director. This rewinds the device and if
1120 * configured does a offline and ensures that Bacula will
1121 * re-read the label of the tape before continuing. This gives
1122 * the operator the chance to change the tape anytime before the
1125 static bool release_cmd(JCR *jcr)
1128 BSOCK *dir = jcr->dir_bsock;
1133 if (sscanf(dir->msg, "release %127s drive=%d", devname.c_str(), &drive) == 2) {
1134 dcr = find_device(jcr, devname, NULL, drive);
1137 dev->Lock(); /* Use P to avoid indefinite block */
1138 if (!dev->is_open()) {
1139 if (!dev->is_busy()) {
1140 unload_autochanger(dcr, -1);
1142 Dmsg0(90, "Device already released\n");
1143 dir->fsend(_("3921 Device \"%s\" already released.\n"),
1146 } else if (dev->blocked() == BST_WAITING_FOR_SYSOP) {
1147 Dmsg2(90, "%d waiter dev_block=%d.\n", dev->num_waiting,
1149 unload_autochanger(dcr, -1);
1150 dir->fsend(_("3922 Device \"%s\" waiting for sysop.\n"),
1153 } else if (dev->blocked() == BST_UNMOUNTED_WAITING_FOR_SYSOP) {
1154 Dmsg2(90, "%d waiter dev_block=%d. doing unmount\n", dev->num_waiting,
1156 dir->fsend(_("3922 Device \"%s\" waiting for mount.\n"),
1159 } else if (dev->blocked() == BST_DOING_ACQUIRE) {
1160 dir->fsend(_("3923 Device \"%s\" is busy in acquire.\n"),
1163 } else if (dev->blocked() == BST_WRITING_LABEL) {
1164 dir->fsend(_("3914 Device \"%s\" is being labeled.\n"),
1167 } else if (dev->is_busy()) {
1168 send_dir_busy_message(dir, dev);
1169 } else { /* device not being used */
1170 Dmsg0(90, "Device not in use, releasing\n");
1171 dcr->release_volume();
1172 dir->fsend(_("3022 Device \"%s\" released.\n"),
1178 dir->fsend(_("3999 Device \"%s\" not found or could not be opened.\n"), devname.c_str());
1181 /* NB dir->msg gets clobbered in bnet_fsend, so save command */
1182 pm_strcpy(jcr->errmsg, dir->msg);
1183 dir->fsend(_("3927 Error scanning release command: %s\n"), jcr->errmsg);
1185 dir->signal(BNET_EOD);
1189 static pthread_mutex_t bsr_mutex = PTHREAD_MUTEX_INITIALIZER;
1190 static uint32_t bsr_uniq = 0;
1192 static bool get_bootstrap_file(JCR *jcr, BSOCK *sock)
1194 POOLMEM *fname = get_pool_memory(PM_FNAME);
1198 if (jcr->RestoreBootstrap) {
1199 unlink(jcr->RestoreBootstrap);
1200 free_pool_memory(jcr->RestoreBootstrap);
1204 Mmsg(fname, "%s/%s.%s.%d.bootstrap", me->working_directory, me->hdr.name,
1205 jcr->Job, bsr_uniq);
1207 Dmsg1(400, "bootstrap=%s\n", fname);
1208 jcr->RestoreBootstrap = fname;
1209 bs = fopen(fname, "a+b"); /* create file */
1212 Jmsg(jcr, M_FATAL, 0, _("Could not create bootstrap file %s: ERR=%s\n"),
1213 jcr->RestoreBootstrap, be.bstrerror());
1216 Dmsg0(150, "=== Bootstrap file ===\n");
1217 while (sock->recv() >= 0) {
1218 Dmsg1(150, "%s", sock->msg);
1219 fputs(sock->msg, bs);
1222 Dmsg0(150, "=== end bootstrap file ===\n");
1223 jcr->bsr = parse_bsr(jcr, jcr->RestoreBootstrap);
1225 Jmsg(jcr, M_FATAL, 0, _("Error parsing bootstrap file.\n"));
1228 if (chk_dbglvl(150)) {
1229 dump_bsr(jcr->bsr, true);
1231 /* If we got a bootstrap, we are reading, so create read volume list */
1232 create_restore_volume_list(jcr);
1236 unlink(jcr->RestoreBootstrap);
1237 free_pool_memory(jcr->RestoreBootstrap);
1238 jcr->RestoreBootstrap = NULL;
1240 sock->fsend(ERROR_bootstrap);
1243 return sock->fsend(OK_bootstrap);
1246 static bool bootstrap_cmd(JCR *jcr)
1248 return get_bootstrap_file(jcr, jcr->dir_bsock);
1252 * Autochanger command from Director
1254 static bool changer_cmd(JCR *jcr)
1257 BSOCK *dir = jcr->dir_bsock;
1260 const char *cmd = NULL;
1263 * A safe_cmd may call autochanger script but does not load/unload
1264 * slots so it can be done at the same time that the drive is open.
1266 bool safe_cmd = false;
1268 if (sscanf(dir->msg, "autochanger listall %127s", devname.c_str()) == 1) {
1270 safe_cmd = ok = true;
1271 } else if (sscanf(dir->msg, "autochanger list %127s", devname.c_str()) == 1) {
1273 safe_cmd = ok = true;
1274 } else if (sscanf(dir->msg, "autochanger slots %127s", devname.c_str()) == 1) {
1276 safe_cmd = ok = true;
1277 } else if (sscanf(dir->msg, "autochanger drives %127s", devname.c_str()) == 1) {
1279 safe_cmd = ok = true;
1282 dcr = find_device(jcr, devname, NULL, -1);
1285 dev->Lock(); /* Use P to avoid indefinite block */
1286 if (!dev->device->changer_res) {
1287 dir->fsend(_("3998 Device \"%s\" is not an autochanger.\n"),
1289 /* Under certain "safe" conditions, we can steal the lock */
1290 } else if (safe_cmd || !dev->is_open() || dev->can_steal_lock()) {
1291 autochanger_cmd(dcr, dir, cmd);
1292 } else if (dev->is_busy() || dev->is_blocked()) {
1293 send_dir_busy_message(dir, dev);
1294 } else { /* device not being used */
1295 autochanger_cmd(dcr, dir, cmd);
1300 dir->fsend(_("3999 Device \"%s\" not found or could not be opened.\n"), devname.c_str());
1302 } else { /* error on scanf */
1303 pm_strcpy(jcr->errmsg, dir->msg);
1304 dir->fsend(_("3908 Error scanning autochanger drives/list/slots command: %s\n"),
1307 dir->signal(BNET_EOD);
1312 * Read and return the Volume label
1314 static bool readlabel_cmd(JCR *jcr)
1317 BSOCK *dir = jcr->dir_bsock;
1320 int32_t Slot, drive;
1322 if (sscanf(dir->msg, "readlabel %127s Slot=%d drive=%d", devname.c_str(),
1323 &Slot, &drive) == 3) {
1324 dcr = find_device(jcr, devname, NULL, drive);
1327 dev->Lock(); /* Use P to avoid indefinite block */
1328 if (!dev->is_open()) {
1329 read_volume_label(jcr, dcr, dev, Slot);
1331 /* Under certain "safe" conditions, we can steal the lock */
1332 } else if (dev->can_steal_lock()) {
1333 read_volume_label(jcr, dcr, dev, Slot);
1334 } else if (dev->is_busy() || dev->is_blocked()) {
1335 send_dir_busy_message(dir, dev);
1336 } else { /* device not being used */
1337 read_volume_label(jcr, dcr, dev, Slot);
1342 dir->fsend(_("3999 Device \"%s\" not found or could not be opened.\n"), devname.c_str());
1345 pm_strcpy(jcr->errmsg, dir->msg);
1346 dir->fsend(_("3909 Error scanning readlabel command: %s\n"), jcr->errmsg);
1348 dir->signal(BNET_EOD);
1354 * Read the tape label
1356 * Enter with the mutex set
1358 static void read_volume_label(JCR *jcr, DCR *dcr, DEVICE *dev, int Slot)
1360 BSOCK *dir = jcr->dir_bsock;
1364 steal_device_lock(dev, &hold, BST_WRITING_LABEL);
1366 if (!try_autoload_device(jcr, dcr, Slot, "")) {
1367 goto bail_out; /* error */
1370 dev->clear_labeled(); /* force read of label */
1371 switch (read_dev_volume_label(dcr)) {
1373 /* DO NOT add quotes around the Volume name. It is scanned in the DIR */
1374 dir->fsend(_("3001 Volume=%s Slot=%d\n"), dev->VolHdr.VolumeName, Slot);
1375 Dmsg1(100, "Volume: %s\n", dev->VolHdr.VolumeName);
1378 dir->fsend(_("3902 Cannot mount Volume on Storage Device \"%s\" because:\n%s"),
1379 dev->print_name(), jcr->errmsg);
1384 give_back_device_lock(dev, &hold);
1388 static bool try_autoload_device(JCR *jcr, DCR *dcr, int slot, const char *VolName)
1390 BSOCK *dir = jcr->dir_bsock;
1392 bstrncpy(dcr->VolumeName, VolName, sizeof(dcr->VolumeName));
1393 dcr->VolCatInfo.Slot = slot;
1394 dcr->VolCatInfo.InChanger = slot > 0;
1395 if (autoload_device(dcr, 0, dir) < 0) { /* autoload if possible */
1401 static void send_dir_busy_message(BSOCK *dir, DEVICE *dev)
1403 if (dev->is_blocked()) {
1404 switch (dev->blocked()) {
1406 dir->fsend(_("3931 Device \"%s\" is BLOCKED. user unmounted.\n"),
1409 case BST_UNMOUNTED_WAITING_FOR_SYSOP:
1410 dir->fsend(_("3932 Device \"%s\" is BLOCKED. user unmounted during wait for media/mount.\n"),
1413 case BST_WAITING_FOR_SYSOP:
1414 dir->fsend(_("3933 Device \"%s\" is BLOCKED waiting for media.\n"),
1417 case BST_DOING_ACQUIRE:
1418 dir->fsend(_("3934 Device \"%s\" is being initialized.\n"),
1421 case BST_WRITING_LABEL:
1422 dir->fsend(_("3935 Device \"%s\" is blocked labeling a Volume.\n"),
1426 dir->fsend(_("3935 Device \"%s\" is blocked for unknown reason.\n"),
1430 } else if (dev->can_read()) {
1431 dir->fsend(_("3936 Device \"%s\" is busy reading.\n"),
1432 dev->print_name());;
1434 dir->fsend(_("3937 Device \"%s\" is busy with writers=%d reserved=%d.\n"),
1435 dev->print_name(), dev->num_writers, dev->num_reserved());