2 Bacula(R) - The Network Backup Solution
4 Copyright (C) 2000-2016 Kern Sibbald
6 The original author of Bacula is Kern Sibbald, with contributions
7 from many others, a complete list can be found in the file AUTHORS.
9 You may use this file and others of this release according to the
10 license defined in the LICENSE file, which includes the Affero General
11 Public License, v3.0 ("AGPLv3") and some additional permissions and
12 terms pursuant to its AGPLv3 Section 7.
14 This notice must be preserved when any source code is
15 conveyed and/or propagated.
17 Bacula(R) is a registered trademark of Kern Sibbald.
21 * tape_dev.c -- low level operations on tape devices
23 * written by, Kern Sibbald, MM
24 * separated from dev.c in February 2014
26 * The separation between tape and file is not yet clean.
31 * Handling I/O errors and end of tape conditions are a bit tricky.
32 * This is how it is currently done when writing.
33 * On either an I/O error or end of tape,
34 * we will stop writing on the physical device (no I/O recovery is
35 * attempted at least in this daemon). The state flag will be sent
36 * to include ST_EOT, which is ephemeral, and ST_WEOT, which is
37 * persistent. Lots of routines clear ST_EOT, but ST_WEOT is
38 * cleared only when the problem goes away. Now when ST_WEOT
39 * is set all calls to write_block_to_device() call the fix_up
40 * routine. In addition, all threads are blocked
41 * from writing on the tape by calling lock_dev(), and thread other
42 * than the first thread to hit the EOT will block on a condition
43 * variable. The first thread to hit the EOT will continue to
44 * be able to read and write the tape (he sort of tunnels through
45 * the locking mechanism -- see lock_dev() for details).
47 * Now presumably somewhere higher in the chain of command
48 * (device.c), someone will notice the EOT condition and
49 * get a new tape up, get the tape label read, and mark
50 * the label for rewriting. Then this higher level routine
51 * will write the unwritten buffer to the new volume.
52 * Finally, he will release
53 * any blocked threads by doing a broadcast on the condition
54 * variable. At that point, we should be totally back in
55 * business with no lost data.
65 /* Imported functions */
66 extern void set_os_device_parameters(DCR *dcr);
67 extern bool dev_get_os_pos(DEVICE *dev, struct mtget *mt_stat);
68 extern uint32_t status_dev(DEVICE *dev);
69 const char *mode_to_str(int mode);
73 void DEVICE::open_tape_device(DCR *dcr, int omode)
76 int timeout = max_open_wait;
77 #if !defined(HAVE_WIN32)
79 utime_t start_time = time(NULL);
82 mount(1); /* do mount if required */
84 Dmsg0(100, "Open dev: device is tape\n");
86 get_autochanger_loaded_slot(dcr);
95 if (is_fifo() && timeout) {
97 tid = start_thread_timer(dcr->jcr, pthread_self(), timeout);
99 Dmsg2(100, "Try open %s mode=%s\n", print_name(), mode_to_str(omode));
100 #if defined(HAVE_WIN32)
103 if ((m_fd = d_open(dev_name, mode)) < 0) {
110 /* If busy retry each second for max_open_wait seconds */
112 /* Try non-blocking open */
113 m_fd = d_open(dev_name, mode+O_NONBLOCK);
117 Dmsg5(100, "Open error on %s omode=%d mode=%x errno=%d: ERR=%s\n",
118 print_name(), omode, mode, errno, be.bstrerror());
120 /* Tape open, now rewind it */
121 Dmsg0(100, "Rewind after open\n");
122 mt_com.mt_op = MTREW;
124 /* rewind only if dev is a tape */
125 if (is_tape() && (d_ioctl(m_fd, MTIOCTOP, (char *)&mt_com) < 0)) {
127 dev_errno = errno; /* set error status from rewind */
130 Dmsg2(100, "Rewind error on %s close: ERR=%s\n", print_name(),
131 be.bstrerror(dev_errno));
132 /* If we get busy, device is probably rewinding, try again */
133 if (dev_errno != EBUSY) {
134 break; /* error -- no medium */
137 /* Got fd and rewind worked, so we must have medium in drive */
139 m_fd = d_open(dev_name, mode); /* open normally */
143 Dmsg5(100, "Open error on %s omode=%d mode=%x errno=%d: ERR=%s\n",
144 print_name(), omode, mode, errno, be.bstrerror());
149 set_os_device_parameters(dcr); /* do system dependent stuff */
150 break; /* Successfully opened and rewound */
154 /* Exceed wait time ? */
155 if (time(NULL) - start_time >= max_open_wait) {
156 break; /* yes, get out */
163 Mmsg2(errmsg, _("Unable to open device %s: ERR=%s\n"),
164 print_name(), be.bstrerror(dev_errno));
166 pm_strcpy(dcr->jcr->errmsg, errmsg);
168 Dmsg1(100, "%s", errmsg);
171 /* Stop any open() timer we started */
173 stop_thread_timer(tid);
176 Dmsg1(100, "open dev: tape %d opened\n", m_fd);
182 * Returns: true on success
185 bool tape_dev::rewind(DCR *dcr)
191 Dmsg3(400, "rewind res=%d fd=%d %s\n", num_reserved(), m_fd, print_name());
192 state &= ~(ST_EOT|ST_EOF|ST_WEOT); /* remove EOF/EOT flags */
193 block_num = file = 0;
200 mt_com.mt_op = MTREW;
202 /* If we get an I/O error on rewind, it is probably because
203 * the drive is actually busy. We loop for (about 5 minutes)
204 * retrying every 5 seconds.
206 for (i=max_rewind_wait; ; i -= 5) {
207 if (d_ioctl(m_fd, MTIOCTOP, (char *)&mt_com) < 0) {
210 if (i == max_rewind_wait) {
211 Dmsg1(200, "Rewind error, %s. retrying ...\n", be.bstrerror());
214 * This is a gross hack, because if the user has the
215 * device mounted (i.e. open), then uses mtx to load
216 * a tape, the current open file descriptor is invalid.
217 * So, we close the drive and re-open it.
220 int open_mode = openmode;
223 open(dcr, open_mode);
231 if (dev_errno == EIO) {
232 Mmsg1(errmsg, _("No tape loaded or drive offline on %s.\n"), print_name());
236 if (dev_errno == EIO && i > 0) {
237 Dmsg0(200, "Sleeping 5 seconds.\n");
242 Mmsg2(errmsg, _("Rewind error on %s. ERR=%s.\n"),
243 print_name(), be.bstrerror());
253 * Position device to end of medium (end of data)
254 * Returns: true on succes
257 bool DEVICE::eod(DCR *dcr)
267 Mmsg1(errmsg, _("Bad call to eod. Device %s not open\n"), print_name());
268 Dmsg1(100, "%s", errmsg);
272 #if defined (__digital__) && defined (__unix__)
273 return fsf(VolCatInfo.VolCatFiles);
280 clear_eof(); /* remove EOF flag */
281 block_num = file = 0;
289 pos = lseek(dcr, (boffset_t)0, SEEK_END);
290 Dmsg1(200, "====== Seek to %lld\n", pos);
299 Mmsg2(errmsg, _("lseek error on %s. ERR=%s.\n"),
300 print_name(), be.bstrerror());
301 Dmsg1(100, "%s", errmsg);
306 if (has_cap(CAP_FASTFSF) && !has_cap(CAP_EOM)) {
307 Dmsg0(100,"Using FAST FSF for EOM\n");
308 /* If unknown position, rewind */
309 if (get_os_tape_file() < 0) {
311 Dmsg0(100, "Rewind error\n");
316 mt_com.mt_op = MTFSF;
318 * ***FIXME*** fix code to handle case that INT16_MAX is
321 mt_com.mt_count = INT16_MAX; /* use big positive number */
322 if (mt_com.mt_count < 0) {
323 mt_com.mt_count = INT16_MAX; /* brain damaged system */
327 if (has_cap(CAP_MTIOCGET) && (has_cap(CAP_FASTFSF) || has_cap(CAP_EOM))) {
328 if (has_cap(CAP_EOM)) {
329 Dmsg0(100,"Using EOM for EOM\n");
330 mt_com.mt_op = MTEOM;
334 if (d_ioctl(m_fd, MTIOCTOP, (char *)&mt_com) < 0) {
336 clrerror(mt_com.mt_op);
337 Dmsg1(50, "ioctl error: %s\n", be.bstrerror());
339 Mmsg2(errmsg, _("ioctl MTEOM error on %s. ERR=%s.\n"),
340 print_name(), be.bstrerror());
341 Dmsg1(100, "%s", errmsg);
346 os_file = get_os_tape_file();
350 Mmsg2(errmsg, _("ioctl MTIOCGET error on %s. ERR=%s.\n"),
351 print_name(), be.bstrerror());
352 Dmsg1(100, "%s", errmsg);
356 Dmsg1(100, "EOD file=%d\n", os_file);
364 * Rewind then use FSF until EOT reached
367 Dmsg0(100, "Rewind error.\n");
372 * Move file by file to the end of the tape
375 for (file_num=file; !at_eot(); file_num++) {
376 Dmsg0(200, "eod: doing fsf 1\n");
378 Dmsg0(100, "fsf error.\n");
383 * Avoid infinite loop by ensuring we advance.
385 if (!at_eot() && file_num == (int)file) {
386 Dmsg1(100, "fsf did not advance from file %d\n", file_num);
388 os_file = get_os_tape_file();
390 Dmsg2(100, "Adjust file from %d to %d\n", file_num, os_file);
398 * Some drivers leave us after second EOF when doing
399 * MTEOM, so we must backup so that appending overwrites
402 if (has_cap(CAP_BSFATEOM)) {
403 /* Backup over EOF */
405 /* If BSF worked and fileno is known (not -1), set file */
406 os_file = get_os_tape_file();
408 Dmsg2(100, "BSFATEOF adjust file from %d to %d\n", file , os_file);
411 file++; /* wing it -- not correct on all OSes */
414 update_pos(dcr); /* update position */
416 Dmsg1(200, "EOD dev->file=%d\n", file);
422 * Load medium in device
423 * Returns: true on success
426 bool load_dev(DEVICE *dev)
433 dev->dev_errno = EBADF;
434 Mmsg0(dev->errmsg, _("Bad call to load_dev. Device not open\n"));
435 Emsg0(M_FATAL, 0, dev->errmsg);
438 if (!(dev->is_tape())) {
442 Dmsg0(200, "stored: MTLOAD command not available\n");
444 dev->dev_errno = ENOTTY; /* function not available */
445 Mmsg2(dev->errmsg, _("ioctl MTLOAD error on %s. ERR=%s.\n"),
446 dev->print_name(), be.bstrerror());
450 dev->block_num = dev->file = 0;
453 mt_com.mt_op = MTLOAD;
455 if (dev->d_ioctl(dev->fd(), MTIOCTOP, (char *)&mt_com) < 0) {
457 dev->dev_errno = errno;
458 Mmsg2(dev->errmsg, _("ioctl MTLOAD error on %s. ERR=%s.\n"),
459 dev->print_name(), be.bstrerror());
467 * Rewind device and put it offline
468 * Returns: true on success
471 bool tape_dev::offline()
476 return true; /* device not open */
479 state &= ~(ST_APPEND|ST_READ|ST_EOT|ST_EOF|ST_WEOT); /* remove EOF/EOT flags */
480 block_num = file = 0;
484 mt_com.mt_op = MTOFFL;
486 if (d_ioctl(m_fd, MTIOCTOP, (char *)&mt_com) < 0) {
489 Mmsg2(errmsg, _("ioctl MTOFFL error on %s. ERR=%s.\n"),
490 print_name(), be.bstrerror());
493 Dmsg1(100, "Offlined device %s\n", print_name());
497 bool DEVICE::offline_or_rewind()
502 if (has_cap(CAP_OFFLINEUNMOUNT)) {
506 * Note, this rewind probably should not be here (it wasn't
507 * in prior versions of Bacula), but on FreeBSD, this is
508 * needed in the case the tape was "frozen" due to an error
509 * such as backspacing after writing and EOF. If it is not
510 * done, all future references to the drive get and I/O error.
518 * Foward space a file
519 * Returns: true on success
522 bool tape_dev::fsf(int num)
530 Mmsg0(errmsg, _("Bad call to fsf. Device not open\n"));
531 Emsg0(M_FATAL, 0, errmsg);
541 Mmsg1(errmsg, _("Device %s at End of Tape.\n"), print_name());
545 Dmsg0(200, "ST_EOF set on entry to FSF\n");
551 * If Fast forward space file is set, then we
552 * use MTFSF to forward space and MTIOCGET
553 * to get the file position. We assume that
554 * the SCSI driver will ensure that we do not
555 * forward space past the end of the medium.
557 if (has_cap(CAP_FSF) && has_cap(CAP_MTIOCGET) && has_cap(CAP_FASTFSF)) {
559 mt_com.mt_op = MTFSF;
560 mt_com.mt_count = num;
561 stat = d_ioctl(m_fd, MTIOCTOP, (char *)&mt_com);
563 my_errno = errno; /* save errno */
564 } else if ((os_file=get_os_tape_file()) < 0) {
565 my_errno = errno; /* save errno */
570 Dmsg0(200, "Set ST_EOT\n");
572 Mmsg2(errmsg, _("ioctl MTFSF error on %s. ERR=%s.\n"),
573 print_name(), be.bstrerror(my_errno));
574 Dmsg1(200, "%s", errmsg);
578 Dmsg1(200, "fsf file=%d\n", os_file);
584 * Here if CAP_FSF is set, and virtually all drives
585 * these days support it, we read a record, then forward
586 * space one file. Using this procedure, which is slow,
587 * is the only way we can be sure that we don't read
588 * two consecutive EOF marks, which means End of Data.
590 } else if (has_cap(CAP_FSF)) {
593 Dmsg0(200, "FSF has cap_fsf\n");
594 if (max_block_size == 0) {
595 rbuf_len = DEFAULT_BLOCK_SIZE;
597 rbuf_len = max_block_size;
599 rbuf = get_memory(rbuf_len);
600 mt_com.mt_op = MTFSF;
602 while (num-- && !at_eot()) {
603 Dmsg0(100, "Doing read before fsf\n");
604 if ((stat = this->read((char *)rbuf, rbuf_len)) < 0) {
605 if (errno == ENOMEM) { /* tape record exceeds buf len */
606 stat = rbuf_len; /* This is OK */
608 * On IBM drives, they return ENOSPC at EOM
609 * instead of EOF status
611 } else if (at_eof() && errno == ENOSPC) {
617 Dmsg2(100, "Set ST_EOT read errno=%d. ERR=%s\n", dev_errno,
619 Mmsg2(errmsg, _("read error on %s. ERR=%s.\n"),
620 print_name(), be.bstrerror());
621 Dmsg1(100, "%s", errmsg);
625 if (stat == 0) { /* EOF */
626 Dmsg1(100, "End of File mark from read. File=%d\n", file+1);
627 /* Two reads of zero means end of tape */
630 Dmsg0(100, "Set ST_EOT\n");
636 } else { /* Got data */
641 Dmsg0(100, "Doing MTFSF\n");
642 stat = d_ioctl(m_fd, MTIOCTOP, (char *)&mt_com);
643 if (stat < 0) { /* error => EOT */
646 Dmsg0(100, "Set ST_EOT\n");
648 Mmsg2(errmsg, _("ioctl MTFSF error on %s. ERR=%s.\n"),
649 print_name(), be.bstrerror());
650 Dmsg0(100, "Got < 0 for MTFSF\n");
651 Dmsg1(100, "%s", errmsg);
659 * No FSF, so use FSR to simulate it
662 Dmsg0(200, "Doing FSR for FSF\n");
663 while (num-- && !at_eot()) {
664 fsr(INT32_MAX); /* returns -1 on EOF or EOT */
668 Mmsg1(errmsg, _("Device %s at End of Tape.\n"), print_name());
674 Dmsg1(200, "Return %d from FSF\n", stat);
676 Dmsg0(200, "ST_EOF set on exit FSF\n");
679 Dmsg0(200, "ST_EOT set on exit FSF\n");
681 Dmsg1(200, "Return from FSF file=%d\n", file);
686 * Backward space a file
687 * Returns: false on failure
690 bool tape_dev::bsf(int num)
697 Mmsg0(errmsg, _("Bad call to bsf. Device not open\n"));
698 Emsg0(M_FATAL, 0, errmsg);
703 Mmsg1(errmsg, _("Device %s cannot BSF because it is not a tape.\n"),
714 mt_com.mt_op = MTBSF;
715 mt_com.mt_count = num;
716 stat = d_ioctl(m_fd, MTIOCTOP, (char *)&mt_com);
720 Mmsg2(errmsg, _("ioctl MTBSF error on %s. ERR=%s.\n"),
721 print_name(), be.bstrerror());
728 * Foward space num records
729 * Returns: false on failure
732 bool DEVICE::fsr(int num)
739 Mmsg0(errmsg, _("Bad call to fsr. Device not open\n"));
740 Emsg0(M_FATAL, 0, errmsg);
748 if (!has_cap(CAP_FSR)) {
749 Mmsg1(errmsg, _("ioctl MTFSR not permitted on %s.\n"), print_name());
753 Dmsg1(100, "fsr %d\n", num);
754 mt_com.mt_op = MTFSR;
755 mt_com.mt_count = num;
756 stat = d_ioctl(m_fd, MTIOCTOP, (char *)&mt_com);
762 struct mtget mt_stat;
764 Dmsg1(100, "FSF fail: ERR=%s\n", be.bstrerror());
765 if (dev_get_os_pos(this, &mt_stat)) {
766 Dmsg4(100, "Adjust from %d:%d to %d:%d\n", file,
767 block_num, mt_stat.mt_fileno, mt_stat.mt_blkno);
768 file = mt_stat.mt_fileno;
769 block_num = mt_stat.mt_blkno;
777 Mmsg3(errmsg, _("ioctl MTFSR %d error on %s. ERR=%s.\n"),
778 num, print_name(), be.bstrerror());
784 * Backward space a record
785 * Returns: false on failure
788 bool DEVICE::bsr(int num)
795 Mmsg0(errmsg, _("Bad call to bsr_dev. Device not open\n"));
796 Emsg0(M_FATAL, 0, errmsg);
804 if (!has_cap(CAP_BSR)) {
805 Mmsg1(errmsg, _("ioctl MTBSR not permitted on %s.\n"), print_name());
809 Dmsg0(100, "bsr_dev\n");
813 mt_com.mt_op = MTBSR;
814 mt_com.mt_count = num;
815 stat = d_ioctl(m_fd, MTIOCTOP, (char *)&mt_com);
819 Mmsg2(errmsg, _("ioctl MTBSR error on %s. ERR=%s.\n"),
820 print_name(), be.bstrerror());
825 void tape_dev::lock_door()
829 if (!is_tape()) return;
830 mt_com.mt_op = MTLOCK;
832 d_ioctl(m_fd, MTIOCTOP, (char *)&mt_com);
836 void tape_dev::unlock_door()
840 if (!is_tape()) return;
841 mt_com.mt_op = MTUNLOCK;
843 d_ioctl(m_fd, MTIOCTOP, (char *)&mt_com);
848 * Reposition the device to file, block
849 * Returns: false on failure
852 bool tape_dev::reposition(DCR *dcr, uint32_t rfile, uint32_t rblock)
856 Mmsg0(errmsg, _("Bad call to reposition. Device not open\n"));
857 Emsg0(M_FATAL, 0, errmsg);
861 /* After this point, we are tape only */
862 Dmsg4(100, "reposition from %u:%u to %u:%u\n", file, block_num, rfile, rblock);
864 Dmsg0(100, "Rewind\n");
870 Dmsg1(100, "fsf %d\n", rfile-file);
871 if (!fsf(rfile-file)) {
872 Dmsg1(100, "fsf failed! ERR=%s\n", bstrerror());
875 Dmsg2(100, "wanted_file=%d at_file=%d\n", rfile, file);
877 if (rblock < block_num) {
878 Dmsg2(100, "wanted_blk=%d at_blk=%d\n", rblock, block_num);
879 Dmsg0(100, "bsf 1\n");
881 Dmsg0(100, "fsf 1\n");
883 Dmsg2(100, "wanted_blk=%d at_blk=%d\n", rblock, block_num);
885 if (has_cap(CAP_POSITIONBLOCKS) && rblock > block_num) {
886 /* Ignore errors as Bacula can read to the correct block */
887 Dmsg1(100, "fsr %d\n", rblock-block_num);
888 return fsr(rblock-block_num);
890 while (rblock > block_num) {
891 if (!dcr->read_block_from_dev(NO_BLOCK_NUMBER_CHECK)) {
894 Dmsg2(30, "Failed to find requested block on %s: ERR=%s",
895 print_name(), be.bstrerror());
898 Dmsg2(300, "moving forward wanted_blk=%d at_blk=%d\n", rblock, block_num);
905 * Write an end of file on the device
906 * Returns: true on success
909 bool DEVICE::weof(int num)
913 Dmsg1(129, "=== weof_dev=%s\n", print_name());
917 Mmsg0(errmsg, _("Bad call to weof_dev. Device not open\n"));
918 Emsg0(M_FATAL, 0, errmsg);
927 Mmsg0(errmsg, _("Attempt to WEOF on non-appendable Volume\n"));
928 Emsg0(M_FATAL, 0, errmsg);
934 mt_com.mt_op = MTWEOF;
935 mt_com.mt_count = num;
936 stat = d_ioctl(m_fd, MTIOCTOP, (char *)&mt_com);
945 Mmsg2(errmsg, _("ioctl MTWEOF error on %s. ERR=%s.\n"),
946 print_name(), be.bstrerror());
953 * If timeout, wait until the mount command returns 0.
954 * If !timeout, try to mount the device only once.
956 bool tape_dev::mount(int timeout)
958 Dmsg0(190, "Enter tape mount\n");
960 if (!is_mounted() && device->mount_command) {
961 return mount_tape(1, timeout);
968 * If timeout, wait until the unmount command returns 0.
969 * If !timeout, try to unmount the device only once.
971 bool tape_dev::unmount(int timeout)
973 Dmsg0(100, "Enter tape unmount\n");
975 if (!is_mounted() && requires_mount() && device->unmount_command) {
976 return mount_tape(0, timeout);
983 * (Un)mount the device (for tape devices)
985 bool tape_dev::mount_tape(int mount, int dotimeout)
987 POOL_MEM ocmd(PM_FNAME);
995 icmd = device->mount_command;
997 icmd = device->unmount_command;
1000 edit_mount_codes(ocmd, icmd);
1002 Dmsg2(100, "mount_tape: cmd=%s mounted=%d\n", ocmd.c_str(), !!is_mounted());
1005 /* Try at most 10 times to (un)mount the device. This should perhaps be configurable. */
1010 results = get_memory(4000);
1012 /* If busy retry each second */
1013 Dmsg1(100, "mount_tape run_prog=%s\n", ocmd.c_str());
1014 while ((status = run_program_full_output(ocmd.c_str(), max_open_wait/2, results)) != 0) {
1019 Dmsg5(100, "Device %s cannot be %smounted. stat=%d result=%s ERR=%s\n", print_name(),
1020 (mount ? "" : "un"), status, results, be.bstrerror(status));
1021 Mmsg(errmsg, _("Device %s cannot be %smounted. ERR=%s\n"),
1022 print_name(), (mount ? "" : "un"), be.bstrerror(status));
1025 free_pool_memory(results);
1026 Dmsg0(200, "============ mount=0\n");
1031 set_mounted(mount); /* set/clear mounted flag */
1032 free_pool_memory(results);
1033 Dmsg1(200, "============ mount=%d\n", mount);