2 Bacula® - The Network Backup Solution
4 Copyright (C) 2000-2011 Free Software Foundation Europe e.V.
6 The main author of Bacula is Kern Sibbald, with contributions from
7 many others, a complete list can be found in the file AUTHORS.
8 This program is Free Software; you can redistribute it and/or
9 modify it under the terms of version three of the GNU Affero General Public
10 License as published by the Free Software Foundation and included
13 This program is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
18 You should have received a copy of the GNU Affero General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
23 Bacula® is a registered trademark of Kern Sibbald.
24 The licensor of Bacula is the Free Software Foundation Europe
25 (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich,
26 Switzerland, email:ftf@fsfeurope.org.
29 * Subroutines to handle waiting for operator intervention
30 * or waiting for a Device to be released
32 * Code for wait_for_sysop() pulled from askdir.c
34 * Kern Sibbald, March 2005
39 #include "bacula.h" /* pull in global headers */
40 #include "stored.h" /* pull in Storage Deamon headers */
42 const int dbglvl = 400;
45 * Wait for SysOp to mount a tape on a specific device
47 * Returns: W_ERROR, W_TIMEOUT, W_POLL, W_MOUNT, or W_WAKE
49 int wait_for_sysop(DCR *dcr)
53 struct timespec timeout;
54 time_t last_heartbeat = 0;
55 time_t first_start = time(NULL);
59 DEVICE *dev = dcr->dev;
63 Dmsg1(dbglvl, "Enter blocked=%s\n", dev->print_blocked());
66 * Since we want to mount a tape, make sure current one is
67 * not marked as using this drive.
71 unmounted = dev->is_device_unmounted();
74 * Wait requested time (dev->rem_wait_sec). However, we also wake up every
75 * HB_TIME seconds and send a heartbeat to the FD and the Director
76 * to keep stateful firewalls from closing them down while waiting
79 add_wait = dev->rem_wait_sec;
80 if (me->heartbeat_interval && add_wait > me->heartbeat_interval) {
81 add_wait = me->heartbeat_interval;
83 /* If the user did not unmount the tape and we are polling, ensure
84 * that we poll at the correct interval.
86 if (!unmounted && dev->vol_poll_interval && add_wait > dev->vol_poll_interval) {
87 add_wait = dev->vol_poll_interval;
91 Dmsg1(dbglvl, "blocked=%s\n", dev->print_blocked());
92 dev->dev_prev_blocked = dev->blocked();
93 dev->set_blocked(BST_WAITING_FOR_SYSOP); /* indicate waiting for mount */
96 for ( ; !job_canceled(jcr); ) {
97 time_t now, start, total_waited;
99 gettimeofday(&tv, &tz);
100 timeout.tv_nsec = tv.tv_usec * 1000;
101 timeout.tv_sec = tv.tv_sec + add_wait;
103 Dmsg4(dbglvl, "I'm going to sleep on device %s. HB=%d rem_wait=%d add_wait=%d\n",
104 dev->print_name(), (int)me->heartbeat_interval, dev->rem_wait_sec, add_wait);
107 /* Wait required time */
108 stat = pthread_cond_timedwait(&dev->wait_next_vol, &dev->m_mutex, &timeout);
110 Dmsg2(dbglvl, "Wokeup from sleep on device stat=%d blocked=%s\n", stat,
111 dev->print_blocked());
113 total_waited = now - first_start;
114 dev->rem_wait_sec -= (now - start);
116 /* Note, this always triggers the first time. We want that. */
117 if (me->heartbeat_interval) {
118 if (now - last_heartbeat >= me->heartbeat_interval) {
119 /* send heartbeats */
120 if (jcr->file_bsock) {
121 jcr->file_bsock->signal(BNET_HEARTBEAT);
122 Dmsg0(dbglvl, "Send heartbeat to FD.\n");
124 if (jcr->dir_bsock) {
125 jcr->dir_bsock->signal(BNET_HEARTBEAT);
127 last_heartbeat = now;
131 if (stat == EINVAL) {
133 Jmsg1(jcr, M_FATAL, 0, _("pthread timedwait error. ERR=%s\n"), be.bstrerror(stat));
134 stat = W_ERROR; /* error */
139 * Continue waiting if operator is labeling volumes
141 if (dev->blocked() == BST_WRITING_LABEL) {
145 if (dev->rem_wait_sec <= 0) { /* on exceeding wait time return */
146 Dmsg0(dbglvl, "Exceed wait time.\n");
152 * Check if user unmounted the device while we were waiting
154 unmounted = dev->is_device_unmounted();
156 if (!unmounted && dev->vol_poll_interval &&
157 (total_waited >= dev->vol_poll_interval)) {
158 Dmsg1(dbglvl, "poll return in wait blocked=%s\n", dev->print_blocked());
159 dev->poll = true; /* returning a poll event */
164 * Check if user mounted the device while we were waiting
166 if (dev->blocked() == BST_MOUNT) { /* mount request ? */
167 Dmsg0(dbglvl, "Mounted return.\n");
173 * If we did not timeout, then some event happened, so
174 * return to check if state changed.
176 if (stat != ETIMEDOUT) {
178 Dmsg2(dbglvl, "Wake return. stat=%d. ERR=%s\n", stat, be.bstrerror(stat));
179 stat = W_WAKE; /* someone woke us */
184 * At this point, we know we woke up because of a timeout,
185 * that was due to a heartbeat, because any other reason would
186 * have caused us to return, so update the wait counters and continue.
188 add_wait = dev->rem_wait_sec;
189 if (me->heartbeat_interval && add_wait > me->heartbeat_interval) {
190 add_wait = me->heartbeat_interval;
192 /* If the user did not unmount the tape and we are polling, ensure
193 * that we poll at the correct interval.
195 if (!unmounted && dev->vol_poll_interval &&
196 add_wait > dev->vol_poll_interval - total_waited) {
197 add_wait = dev->vol_poll_interval - total_waited;
205 dev->set_blocked(dev->dev_prev_blocked); /* restore entry state */
206 Dmsg1(dbglvl, "set %s\n", dev->print_blocked());
208 Dmsg1(dbglvl, "Exit blocked=%s\n", dev->print_blocked());
215 * Wait for any device to be released, then we return, so
216 * higher level code can rescan possible devices. Since there
217 * could be a job waiting for a drive to free up, we wait a maximum
218 * of 1 minute then retry just in case a broadcast was lost, and
219 * we return to rescan the devices.
221 * Returns: true if a device has changed state
222 * false if the total wait time has expired.
224 bool wait_for_device(JCR *jcr, int &retries)
228 struct timespec timeout;
231 const int max_wait_time = 1 * 60; /* wait 1 minute */
234 Dmsg0(dbglvl, "Enter wait_for_device\n");
235 P(device_release_mutex);
237 if (++retries % 5 == 0) {
238 /* Print message every 5 minutes */
239 Jmsg(jcr, M_MOUNT, 0, _("JobId=%s, Job %s waiting to reserve a device.\n"),
240 edit_uint64(jcr->JobId, ed1), jcr->Job);
243 gettimeofday(&tv, &tz);
244 timeout.tv_nsec = tv.tv_usec * 1000;
245 timeout.tv_sec = tv.tv_sec + max_wait_time;
247 Dmsg0(dbglvl, "Going to wait for a device.\n");
249 /* Wait required time */
250 stat = pthread_cond_timedwait(&wait_device_release, &device_release_mutex, &timeout);
251 Dmsg1(dbglvl, "Wokeup from sleep on device stat=%d\n", stat);
253 V(device_release_mutex);
254 Dmsg1(dbglvl, "Return from wait_device ok=%d\n", ok);
260 * The jcr timers are used for waiting on any device *
261 * Returns: true if time doubled
262 * false if max time expired
264 static bool double_jcr_wait_time(JCR *jcr)
266 jcr->wait_sec *= 2; /* double wait time */
267 if (jcr->wait_sec > jcr->max_wait) { /* but not longer than maxtime */
268 jcr->wait_sec = jcr->max_wait;
271 jcr->rem_wait_sec = jcr->wait_sec;
272 if (jcr->num_wait >= jcr->max_num_wait) {