2 Bacula® - The Network Backup Solution
4 Copyright (C) 2000-2009 Free Software Foundation Europe e.V.
6 The main author of Bacula is Kern Sibbald, with contributions from
7 many others, a complete list can be found in the file AUTHORS.
8 This program is Free Software; you can redistribute it and/or
9 modify it under the terms of version three of the GNU Affero General Public
10 License as published by the Free Software Foundation and included
13 This program is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
18 You should have received a copy of the GNU Affero General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
23 Bacula® is a registered trademark of Kern Sibbald.
24 The licensor of Bacula is the Free Software Foundation Europe
25 (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich,
26 Switzerland, email:ftf@fsfeurope.org.
29 * Subroutines to handle waiting for operator intervention
30 * or waiting for a Device to be released
32 * Code for wait_for_sysop() pulled from askdir.c
34 * Kern Sibbald, March 2005
40 #include "bacula.h" /* pull in global headers */
41 #include "stored.h" /* pull in Storage Deamon headers */
43 const int dbglvl = 400;
46 * Wait for SysOp to mount a tape on a specific device
48 * Returns: W_ERROR, W_TIMEOUT, W_POLL, W_MOUNT, or W_WAKE
50 int wait_for_sysop(DCR *dcr)
54 struct timespec timeout;
55 time_t last_heartbeat = 0;
56 time_t first_start = time(NULL);
60 DEVICE *dev = dcr->dev;
64 Dmsg1(dbglvl, "Enter blocked=%s\n", dev->print_blocked());
67 * Since we want to mount a tape, make sure current one is
68 * not marked as using this drive.
72 unmounted = dev->is_device_unmounted();
75 * Wait requested time (dev->rem_wait_sec). However, we also wake up every
76 * HB_TIME seconds and send a heartbeat to the FD and the Director
77 * to keep stateful firewalls from closing them down while waiting
80 add_wait = dev->rem_wait_sec;
81 if (me->heartbeat_interval && add_wait > me->heartbeat_interval) {
82 add_wait = me->heartbeat_interval;
84 /* If the user did not unmount the tape and we are polling, ensure
85 * that we poll at the correct interval.
87 if (!unmounted && dev->vol_poll_interval && add_wait > dev->vol_poll_interval) {
88 add_wait = dev->vol_poll_interval;
92 Dmsg1(dbglvl, "blocked=%s\n", dev->print_blocked());
93 dev->dev_prev_blocked = dev->blocked();
94 dev->set_blocked(BST_WAITING_FOR_SYSOP); /* indicate waiting for mount */
97 for ( ; !job_canceled(jcr); ) {
98 time_t now, start, total_waited;
100 gettimeofday(&tv, &tz);
101 timeout.tv_nsec = tv.tv_usec * 1000;
102 timeout.tv_sec = tv.tv_sec + add_wait;
104 Dmsg4(dbglvl, "I'm going to sleep on device %s. HB=%d rem_wait=%d add_wait=%d\n",
105 dev->print_name(), (int)me->heartbeat_interval, dev->rem_wait_sec, add_wait);
108 /* Wait required time */
109 stat = pthread_cond_timedwait(&dev->wait_next_vol, &dev->m_mutex, &timeout);
111 Dmsg2(dbglvl, "Wokeup from sleep on device stat=%d blocked=%s\n", stat,
112 dev->print_blocked());
114 total_waited = now - first_start;
115 dev->rem_wait_sec -= (now - start);
117 /* Note, this always triggers the first time. We want that. */
118 if (me->heartbeat_interval) {
119 if (now - last_heartbeat >= me->heartbeat_interval) {
120 /* send heartbeats */
121 if (jcr->file_bsock) {
122 jcr->file_bsock->signal(BNET_HEARTBEAT);
123 Dmsg0(dbglvl, "Send heartbeat to FD.\n");
125 if (jcr->dir_bsock) {
126 jcr->dir_bsock->signal(BNET_HEARTBEAT);
128 last_heartbeat = now;
132 if (stat == EINVAL) {
134 Jmsg1(jcr, M_FATAL, 0, _("pthread timedwait error. ERR=%s\n"), be.bstrerror(stat));
135 stat = W_ERROR; /* error */
139 if (dev->rem_wait_sec <= 0) { /* on exceeding wait time return */
140 Dmsg0(dbglvl, "Exceed wait time.\n");
146 * Check if user unmounted the device while we were waiting
148 unmounted = dev->is_device_unmounted();
150 if (!unmounted && dev->vol_poll_interval &&
151 (total_waited >= dev->vol_poll_interval)) {
152 Dmsg1(dbglvl, "poll return in wait blocked=%s\n", dev->print_blocked());
153 dev->poll = true; /* returning a poll event */
158 * Check if user mounted the device while we were waiting
160 if (dev->blocked() == BST_MOUNT) { /* mount request ? */
161 Dmsg0(dbglvl, "Mounted return.\n");
167 * If we did not timeout, then some event happened, so
168 * return to check if state changed.
170 if (stat != ETIMEDOUT) {
172 Dmsg2(dbglvl, "Wake return. stat=%d. ERR=%s\n", stat, be.bstrerror(stat));
173 stat = W_WAKE; /* someone woke us */
178 * At this point, we know we woke up because of a timeout,
179 * that was due to a heartbeat, because any other reason would
180 * have caused us to return, so update the wait counters and continue.
182 add_wait = dev->rem_wait_sec;
183 if (me->heartbeat_interval && add_wait > me->heartbeat_interval) {
184 add_wait = me->heartbeat_interval;
186 /* If the user did not unmount the tape and we are polling, ensure
187 * that we poll at the correct interval.
189 if (!unmounted && dev->vol_poll_interval &&
190 add_wait > dev->vol_poll_interval - total_waited) {
191 add_wait = dev->vol_poll_interval - total_waited;
199 dev->set_blocked(dev->dev_prev_blocked); /* restore entry state */
200 Dmsg1(dbglvl, "set %s\n", dev->print_blocked());
202 Dmsg1(dbglvl, "Exit blocked=%s\n", dev->print_blocked());
209 * Wait for any device to be released, then we return, so
210 * higher level code can rescan possible devices. Since there
211 * could be a job waiting for a drive to free up, we wait a maximum
212 * of 1 minute then retry just in case a broadcast was lost, and
213 * we return to rescan the devices.
215 * Returns: true if a device has changed state
216 * false if the total wait time has expired.
218 bool wait_for_device(JCR *jcr, int &retries)
222 struct timespec timeout;
225 const int max_wait_time = 1 * 60; /* wait 1 minute */
228 Dmsg0(dbglvl, "Enter wait_for_device\n");
229 P(device_release_mutex);
231 if (++retries % 5 == 0) {
232 /* Print message every 5 minutes */
233 Jmsg(jcr, M_MOUNT, 0, _("JobId=%s, Job %s waiting to reserve a device.\n"),
234 edit_uint64(jcr->JobId, ed1), jcr->Job);
237 gettimeofday(&tv, &tz);
238 timeout.tv_nsec = tv.tv_usec * 1000;
239 timeout.tv_sec = tv.tv_sec + max_wait_time;
241 Dmsg0(dbglvl, "Going to wait for a device.\n");
243 /* Wait required time */
244 stat = pthread_cond_timedwait(&wait_device_release, &device_release_mutex, &timeout);
245 Dmsg1(dbglvl, "Wokeup from sleep on device stat=%d\n", stat);
247 V(device_release_mutex);
248 Dmsg1(dbglvl, "Return from wait_device ok=%d\n", ok);
254 * The jcr timers are used for waiting on any device *
255 * Returns: true if time doubled
256 * false if max time expired
258 static bool double_jcr_wait_time(JCR *jcr)
260 jcr->wait_sec *= 2; /* double wait time */
261 if (jcr->wait_sec > jcr->max_wait) { /* but not longer than maxtime */
262 jcr->wait_sec = jcr->max_wait;
265 jcr->rem_wait_sec = jcr->wait_sec;
266 if (jcr->num_wait >= jcr->max_num_wait) {