2 Bacula® - The Network Backup Solution
4 Copyright (C) 2000-2007 Free Software Foundation Europe e.V.
6 The main author of Bacula is Kern Sibbald, with contributions from
7 many others, a complete list can be found in the file AUTHORS.
8 This program is Free Software; you can redistribute it and/or
9 modify it under the terms of version two of the GNU General Public
10 License as published by the Free Software Foundation plus additions
11 that are listed in the file LICENSE.
13 This program is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
23 Bacula® is a registered trademark of John Walker.
24 The licensor of Bacula is the Free Software Foundation Europe
25 (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich,
26 Switzerland, email:ftf@fsfeurope.org.
30 * Higher Level Device routines.
31 * Knows about Bacula tape labels and such
33 * NOTE! In general, subroutines that have the word
34 * "device" in the name do locking. Subroutines
35 * that have the word "dev" in the name do not
36 * do locking. Thus if xxx_device() calls
37 * yyy_dev(), all is OK, but if xxx_device()
38 * calls yyy_device(), everything will hang.
39 * Obviously, no zzz_dev() is allowed to call
40 * a www_device() or everything falls apart.
42 * Concerning the routines dev->r_lock()() and block_device()
43 * see the end of this module for details. In general,
44 * blocking a device leaves it in a state where all threads
45 * other than the current thread block when they attempt to
46 * lock the device. They remain suspended (blocked) until the device
47 * is unblocked. So, a device is blocked during an operation
48 * that takes a long time (initialization, mounting a new
49 * volume, ...) locking a device is done for an operation
50 * that takes a short time such as writing data to the
54 * Kern Sibbald, MM, MMI
59 #include "bacula.h" /* pull in global headers */
60 #include "stored.h" /* pull in Storage Deamon headers */
65 const int dbglvl = 500;
69 /* Forward referenced functions */
72 * This is the dreaded moment. We either have an end of
73 * medium condition or worse, and error condition.
74 * Attempt to "recover" by obtaining a new Volume.
76 * Here are a few things to know:
77 * dcr->VolCatInfo contains the info on the "current" tape for this job.
78 * dev->VolCatInfo contains the info on the tape in the drive.
79 * The tape in the drive could have changed several times since
80 * the last time the job used it (jcr->VolCatInfo).
81 * dcr->VolumeName is the name of the current/desired tape in the drive.
83 * We enter with device locked, and
84 * exit with device locked.
86 * Note, we are called only from one place in block.c for the daemons.
87 * The btape utility calls it from btape.c.
89 * Returns: true on success
92 bool fixup_device_block_write_error(DCR *dcr)
94 char PrevVolName[MAX_NAME_LENGTH];
96 DEV_BLOCK *block = dcr->block;
99 char dt[MAX_TIME_LENGTH];
101 DEVICE *dev = dcr->dev;
103 wait_time = time(NULL);
105 Dmsg0(100, "Enter fixup_device_block_write_error\n");
107 block_device(dev, BST_DOING_ACQUIRE);
108 /* Unlock, but leave BLOCKED */
111 bstrncpy(PrevVolName, dev->VolCatInfo.VolCatName, sizeof(PrevVolName));
112 bstrncpy(dev->VolHdr.PrevVolumeName, PrevVolName, sizeof(dev->VolHdr.PrevVolumeName));
114 label_blk = new_block(dev);
115 dcr->block = label_blk;
117 /* Inform User about end of medium */
118 Jmsg(jcr, M_INFO, 0, _("End of medium on Volume \"%s\" Bytes=%s Blocks=%s at %s.\n"),
119 PrevVolName, edit_uint64_with_commas(dev->VolCatInfo.VolCatBytes, b1),
120 edit_uint64_with_commas(dev->VolCatInfo.VolCatBlocks, b2),
121 bstrftime(dt, sizeof(dt), time(NULL)));
123 if (!mount_next_write_volume(dcr, 1)) {
124 free_block(label_blk);
128 return false; /* device locked */
130 dev->dlock(); /* lock again */
132 dev->VolCatInfo.VolCatJobs++; /* increment number of jobs on vol */
133 dir_update_volume_info(dcr, false); /* send Volume info to Director */
135 Jmsg(jcr, M_INFO, 0, _("New volume \"%s\" mounted on device %s at %s.\n"),
136 dcr->VolumeName, dev->print_name(), bstrftime(dt, sizeof(dt), time(NULL)));
139 * If this is a new tape, the label_blk will contain the
140 * label, so write it now. If this is a previously
141 * used tape, mount_next_write_volume() will return an
142 * empty label_blk, and nothing will be written.
144 Dmsg0(190, "write label block to dev\n");
145 if (!write_block_to_dev(dcr)) {
147 Pmsg1(0, _("write_block_to_device Volume label failed. ERR=%s"),
148 be.strerror(dev->dev_errno));
149 free_block(label_blk);
152 return false; /* device locked */
154 free_block(label_blk);
158 * Walk through all attached jcrs indicating the volume has changed
160 Dmsg1(100, "Walk attached dcrs. Volume=%s\n", dev->VolCatInfo.VolCatName);
162 foreach_dlist(mdcr, dev->attached_dcrs) {
163 JCR *mjcr = mdcr->jcr;
164 if (mjcr->JobId == 0) {
165 continue; /* ignore console */
169 bstrncpy(mdcr->VolumeName, dcr->VolumeName, sizeof(mdcr->VolumeName));
173 /* Clear NewVol now because dir_get_volume_info() already done */
174 jcr->dcr->NewVol = false;
175 set_new_volume_parameters(dcr);
177 jcr->run_time += time(NULL) - wait_time; /* correct run time for mount wait */
179 /* Write overflow block to device */
180 Dmsg0(190, "Write overflow block to dev\n");
181 if (!write_block_to_dev(dcr)) {
183 Pmsg1(0, _("write_block_to_device overflow block failed. ERR=%s"),
184 be.strerror(dev->dev_errno));
186 return false; /* device locked */
190 return true; /* device locked */
194 * We have a new Volume mounted, so reset the Volume parameters
195 * concerning this job. The global changes were made earlier
196 * in the dev structure.
198 void set_new_volume_parameters(DCR *dcr)
201 DEVICE *dev = dcr->dev;
202 if (dcr->NewVol && !dir_get_volume_info(dcr, GET_VOL_INFO_FOR_WRITE)) {
203 Jmsg1(jcr, M_ERROR, 0, "%s", jcr->errmsg);
205 /* Set new start/end positions */
206 if (dev->is_tape()) {
207 dcr->StartBlock = dev->block_num;
208 dcr->StartFile = dev->file;
210 dcr->StartBlock = (uint32_t)dev->file_addr;
211 dcr->StartFile = (uint32_t)(dev->file_addr >> 32);
214 dcr->VolFirstIndex = 0;
215 dcr->VolLastIndex = 0;
216 jcr->NumWriteVolumes++;
218 dcr->WroteVol = false;
222 * We are now in a new Volume file, so reset the Volume parameters
223 * concerning this job. The global changes were made earlier
224 * in the dev structure.
226 void set_new_file_parameters(DCR *dcr)
228 DEVICE *dev = dcr->dev;
230 /* Set new start/end positions */
231 if (dev->is_tape()) {
232 dcr->StartBlock = dev->block_num;
233 dcr->StartFile = dev->file;
235 dcr->StartBlock = (uint32_t)dev->file_addr;
236 dcr->StartFile = (uint32_t)(dev->file_addr >> 32);
239 dcr->VolFirstIndex = 0;
240 dcr->VolLastIndex = 0;
241 dcr->NewFile = false;
242 dcr->WroteVol = false;
248 * First Open of the device. Expect dev to already be initialized.
250 * This routine is used only when the Storage daemon starts
251 * and always_open is set, and in the stand-alone utility
252 * routines such as bextract.
254 * Note, opening of a normal file is deferred to later so
255 * that we can get the filename; the device_name for
256 * a file is the directory only.
258 * Returns: false on failure
261 bool first_open_device(DCR *dcr)
263 DEVICE *dev = dcr->dev;
266 Dmsg0(120, "start open_output_device()\n");
273 /* Defer opening files */
274 if (!dev->is_tape()) {
275 Dmsg0(129, "Device is file, deferring open.\n");
280 if (dev->has_cap(CAP_STREAM)) {
281 mode = OPEN_WRITE_ONLY;
283 mode = OPEN_READ_ONLY;
285 Dmsg0(129, "Opening device.\n");
286 if (dev->open(dcr, mode) < 0) {
287 Emsg1(M_FATAL, 0, _("dev open failed: %s\n"), dev->errmsg);
291 Dmsg1(129, "open dev %s OK\n", dev->print_name());
299 * Make sure device is open, if not do so
301 bool open_device(DCR *dcr)
303 DEVICE *dev = dcr->dev;
306 if (dev->has_cap(CAP_STREAM)) {
307 mode = OPEN_WRITE_ONLY;
309 mode = OPEN_READ_WRITE;
311 if (dev->open(dcr, mode) < 0) {
312 /* If polling, ignore the error */
313 /* If DVD, also ignore the error, very often you cannot open the device
314 * (when there is no DVD, or when the one inserted is a wrong one) */
315 if (!dev->poll && !dev->is_dvd() && !dev->is_removable()) {
316 Jmsg2(dcr->jcr, M_FATAL, 0, _("Unable to open device %s: ERR=%s\n"),
317 dev->print_name(), dev->bstrerror());
318 Pmsg2(000, _("Unable to open archive %s: ERR=%s\n"),
319 dev->print_name(), dev->bstrerror());
327 * Find which JobId corresponds to the current thread
329 uint32_t get_jobid_from_tid()
334 if (pthread_equal(jcr->my_thread_id, pthread_self())) {
335 JobId = (uint32_t)jcr->JobId;
344 * Check if the device is blocked or not
346 bool is_device_unmounted(DEVICE *dev)
349 int blocked = dev->blocked();
350 stat = (blocked == BST_UNMOUNTED) ||
351 (blocked == BST_UNMOUNTED_WAITING_FOR_SYSOP);
356 void DEVICE::_dlock(const char *file, int line)
358 Dmsg4(sd_dbglvl, "dlock from %s:%d precnt=%d JobId=%u\n", file, line,
359 m_count, get_jobid_from_tid());
360 /* Note, this *really* should be protected by a mutex, but
361 * since it is only debug code we don't worry too much.
363 if (m_count > 0 && pthread_equal(m_pid, pthread_self())) {
364 Dmsg2(sd_dbglvl, "DEADLOCK !!!!!!!!!! from %s:%d\n", file, line);
367 m_pid = pthread_self();
371 void DEVICE::_dunlock(const char *file, int line)
374 Dmsg4(sd_dbglvl, "dunlock from %s:%d postcnt=%d JobId=%u\n", file, line,
375 m_count, get_jobid_from_tid());
379 void DEVICE::_r_dunlock(const char *file, int line)
381 this->_dunlock(file, line);
388 * This is a recursive lock that checks if the device is blocked.
390 * When blocked is set, all threads EXCEPT thread with id no_wait_id
391 * must wait. The no_wait_id thread is out obtaining a new volume
392 * and preparing the label.
395 void DEVICE::_r_dlock(const char *file, int line)
397 void DEVICE::r_dlock()
402 Dmsg4(sd_dbglvl, "r_dlock blked=%s from %s:%d JobId=%u\n", this->print_blocked(),
403 file, line, get_jobid_from_tid());
405 Dmsg1(sd_dbglvl, "reclock blked=%s\n", this->print_blocked());
408 if (this->blocked() && !pthread_equal(this->no_wait_id, pthread_self())) {
409 this->num_waiting++; /* indicate that I am waiting */
410 while (this->blocked()) {
411 Dmsg3(sd_dbglvl, "r_dlock blked=%s no_wait=%p me=%p\n", this->print_blocked(),
412 this->no_wait_id, pthread_self());
413 if ((stat = pthread_cond_wait(&this->wait, &m_mutex)) != 0) {
416 Emsg1(M_ABORT, 0, _("pthread_cond_wait failure. ERR=%s\n"),
420 this->num_waiting--; /* no longer waiting */
425 * Block all other threads from using the device
426 * Device must already be locked. After this call,
427 * the device is blocked to any thread calling dev->r_lock(),
428 * but the device is not locked (i.e. no P on device). Also,
429 * the current thread can do slip through the dev->r_lock()
430 * calls without blocking.
432 void _block_device(const char *file, int line, DEVICE *dev, int state)
434 ASSERT(dev->blocked() == BST_NOT_BLOCKED);
435 dev->set_blocked(state); /* make other threads wait */
436 dev->no_wait_id = pthread_self(); /* allow us to continue */
437 Dmsg3(sd_dbglvl, "set blocked=%s from %s:%d\n", dev->print_blocked(), file, line);
441 * Unblock the device, and wake up anyone who went to sleep.
442 * Enter: device locked
443 * Exit: device locked
445 void _unblock_device(const char *file, int line, DEVICE *dev)
447 Dmsg3(sd_dbglvl, "unblock %s from %s:%d\n", dev->print_blocked(), file, line);
448 ASSERT(dev->blocked());
449 dev->set_blocked(BST_NOT_BLOCKED);
451 if (dev->num_waiting > 0) {
452 pthread_cond_broadcast(&dev->wait); /* wake them up */
457 * Enter with device locked and blocked
458 * Exit with device unlocked and blocked by us.
460 void _steal_device_lock(const char *file, int line, DEVICE *dev, bsteal_lock_t *hold, int state)
463 Dmsg3(sd_dbglvl, "steal lock. old=%s from %s:%d\n", dev->print_blocked(),
465 hold->dev_blocked = dev->blocked();
466 hold->dev_prev_blocked = dev->dev_prev_blocked;
467 hold->no_wait_id = dev->no_wait_id;
468 dev->set_blocked(state);
469 Dmsg1(sd_dbglvl, "steal lock. new=%s\n", dev->print_blocked());
470 dev->no_wait_id = pthread_self();
475 * Enter with device blocked by us but not locked
476 * Exit with device locked, and blocked by previous owner
478 void _give_back_device_lock(const char *file, int line, DEVICE *dev, bsteal_lock_t *hold)
480 Dmsg3(sd_dbglvl, "return lock. old=%s from %s:%d\n",
481 dev->print_blocked(), file, line);
483 dev->set_blocked(hold->dev_blocked);
484 dev->dev_prev_blocked = hold->dev_prev_blocked;
485 dev->no_wait_id = hold->no_wait_id;
486 Dmsg1(sd_dbglvl, "return lock. new=%s\n", dev->print_blocked());
487 if (dev->num_waiting > 0) {
488 pthread_cond_broadcast(&dev->wait); /* wake them up */